llvm-project/llvm/test/CodeGen/X86/pmulh.ll

351 lines
14 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 --check-prefix=SSE2-PROMOTE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 --check-prefix=SSE2-WIDEN
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 --check-prefix=SSE41-PROMOTE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 --check-prefix=SSE41-WIDEN
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
define <4 x i16> @mulhuw_v4i16(<4 x i16> %a, <4 x i16> %b) {
; SSE2-PROMOTE-LABEL: mulhuw_v4i16:
; SSE2-PROMOTE: # %bb.0:
; SSE2-PROMOTE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
; SSE2-PROMOTE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
; SSE2-PROMOTE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-PROMOTE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE2-PROMOTE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; SSE2-PROMOTE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSE2-PROMOTE-NEXT: pmulhuw %xmm1, %xmm0
; SSE2-PROMOTE-NEXT: pxor %xmm1, %xmm1
; SSE2-PROMOTE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSE2-PROMOTE-NEXT: retq
;
; SSE2-WIDEN-LABEL: mulhuw_v4i16:
; SSE2-WIDEN: # %bb.0:
; SSE2-WIDEN-NEXT: pmulhuw %xmm1, %xmm0
; SSE2-WIDEN-NEXT: retq
;
; SSE41-PROMOTE-LABEL: mulhuw_v4i16:
; SSE41-PROMOTE: # %bb.0:
; SSE41-PROMOTE-NEXT: pxor %xmm2, %xmm2
; SSE41-PROMOTE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
; SSE41-PROMOTE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
; SSE41-PROMOTE-NEXT: pmulld %xmm1, %xmm0
; SSE41-PROMOTE-NEXT: psrld $16, %xmm0
; SSE41-PROMOTE-NEXT: retq
;
; SSE41-WIDEN-LABEL: mulhuw_v4i16:
; SSE41-WIDEN: # %bb.0:
; SSE41-WIDEN-NEXT: pmulhuw %xmm1, %xmm0
; SSE41-WIDEN-NEXT: retq
;
; AVX-LABEL: mulhuw_v4i16:
; AVX: # %bb.0:
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX-NEXT: retq
%a1 = zext <4 x i16> %a to <4 x i32>
%b1 = zext <4 x i16> %b to <4 x i32>
%c = mul <4 x i32> %a1, %b1
%d = lshr <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>
%e = trunc <4 x i32> %d to <4 x i16>
ret <4 x i16> %e
}
define <4 x i16> @mulhw_v4i16(<4 x i16> %a, <4 x i16> %b) {
; SSE2-PROMOTE-LABEL: mulhw_v4i16:
; SSE2-PROMOTE: # %bb.0:
; SSE2-PROMOTE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
; SSE2-PROMOTE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
; SSE2-PROMOTE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-PROMOTE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE2-PROMOTE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; SSE2-PROMOTE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSE2-PROMOTE-NEXT: pmulhw %xmm1, %xmm0
; SSE2-PROMOTE-NEXT: pxor %xmm1, %xmm1
; SSE2-PROMOTE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSE2-PROMOTE-NEXT: retq
;
; SSE2-WIDEN-LABEL: mulhw_v4i16:
; SSE2-WIDEN: # %bb.0:
; SSE2-WIDEN-NEXT: pmulhw %xmm1, %xmm0
; SSE2-WIDEN-NEXT: retq
;
; SSE41-PROMOTE-LABEL: mulhw_v4i16:
; SSE41-PROMOTE: # %bb.0:
; SSE41-PROMOTE-NEXT: pslld $16, %xmm0
; SSE41-PROMOTE-NEXT: psrad $16, %xmm0
; SSE41-PROMOTE-NEXT: pslld $16, %xmm1
; SSE41-PROMOTE-NEXT: psrad $16, %xmm1
; SSE41-PROMOTE-NEXT: pmulld %xmm1, %xmm0
; SSE41-PROMOTE-NEXT: psrld $16, %xmm0
; SSE41-PROMOTE-NEXT: retq
;
; SSE41-WIDEN-LABEL: mulhw_v4i16:
; SSE41-WIDEN: # %bb.0:
; SSE41-WIDEN-NEXT: pmulhw %xmm1, %xmm0
; SSE41-WIDEN-NEXT: retq
;
; AVX-LABEL: mulhw_v4i16:
; AVX: # %bb.0:
; AVX-NEXT: vpslld $16, %xmm0, %xmm0
; AVX-NEXT: vpsrad $16, %xmm0, %xmm0
; AVX-NEXT: vpslld $16, %xmm1, %xmm1
; AVX-NEXT: vpsrad $16, %xmm1, %xmm1
; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX-NEXT: retq
%a1 = sext <4 x i16> %a to <4 x i32>
%b1 = sext <4 x i16> %b to <4 x i32>
%c = mul <4 x i32> %a1, %b1
%d = lshr <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>
%e = trunc <4 x i32> %d to <4 x i16>
ret <4 x i16> %e
}
define <8 x i16> @mulhuw_v8i16(<8 x i16> %a, <8 x i16> %b) {
; SSE-LABEL: mulhuw_v8i16:
; SSE: # %bb.0:
; SSE-NEXT: pmulhuw %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: mulhuw_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%a1 = zext <8 x i16> %a to <8 x i32>
%b1 = zext <8 x i16> %b to <8 x i32>
%c = mul <8 x i32> %a1, %b1
%d = lshr <8 x i32> %c, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%e = trunc <8 x i32> %d to <8 x i16>
ret <8 x i16> %e
}
define <8 x i16> @mulhw_v8i16(<8 x i16> %a, <8 x i16> %b) {
; SSE-LABEL: mulhw_v8i16:
; SSE: # %bb.0:
; SSE-NEXT: pmulhw %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: mulhw_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%a1 = sext <8 x i16> %a to <8 x i32>
%b1 = sext <8 x i16> %b to <8 x i32>
%c = mul <8 x i32> %a1, %b1
%d = lshr <8 x i32> %c, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%e = trunc <8 x i32> %d to <8 x i16>
ret <8 x i16> %e
}
define <16 x i16> @mulhuw_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE-LABEL: mulhuw_v16i16:
; SSE: # %bb.0:
; SSE-NEXT: pmulhuw %xmm2, %xmm0
; SSE-NEXT: pmulhuw %xmm3, %xmm1
; SSE-NEXT: retq
;
; AVX-LABEL: mulhuw_v16i16:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0
; AVX-NEXT: retq
%a1 = zext <16 x i16> %a to <16 x i32>
%b1 = zext <16 x i16> %b to <16 x i32>
%c = mul <16 x i32> %a1, %b1
%d = lshr <16 x i32> %c, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%e = trunc <16 x i32> %d to <16 x i16>
ret <16 x i16> %e
}
define <16 x i16> @mulhw_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE-LABEL: mulhw_v16i16:
; SSE: # %bb.0:
; SSE-NEXT: pmulhw %xmm2, %xmm0
; SSE-NEXT: pmulhw %xmm3, %xmm1
; SSE-NEXT: retq
;
; AVX-LABEL: mulhw_v16i16:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0
; AVX-NEXT: retq
%a1 = sext <16 x i16> %a to <16 x i32>
%b1 = sext <16 x i16> %b to <16 x i32>
%c = mul <16 x i32> %a1, %b1
%d = lshr <16 x i32> %c, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%e = trunc <16 x i32> %d to <16 x i16>
ret <16 x i16> %e
}
define <32 x i16> @mulhuw_v32i16(<32 x i16> %a, <32 x i16> %b) {
; SSE-LABEL: mulhuw_v32i16:
; SSE: # %bb.0:
; SSE-NEXT: pmulhuw %xmm4, %xmm0
; SSE-NEXT: pmulhuw %xmm5, %xmm1
; SSE-NEXT: pmulhuw %xmm6, %xmm2
; SSE-NEXT: pmulhuw %xmm7, %xmm3
; SSE-NEXT: retq
;
; AVX2-LABEL: mulhuw_v32i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhuw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpmulhuw %ymm3, %ymm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512F-LABEL: mulhuw_v32i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpmulhuw %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpmulhuw %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: mulhuw_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%a1 = zext <32 x i16> %a to <32 x i32>
%b1 = zext <32 x i16> %b to <32 x i32>
%c = mul <32 x i32> %a1, %b1
%d = lshr <32 x i32> %c, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%e = trunc <32 x i32> %d to <32 x i16>
ret <32 x i16> %e
}
define <32 x i16> @mulhw_v32i16(<32 x i16> %a, <32 x i16> %b) {
; SSE-LABEL: mulhw_v32i16:
; SSE: # %bb.0:
; SSE-NEXT: pmulhw %xmm4, %xmm0
; SSE-NEXT: pmulhw %xmm5, %xmm1
; SSE-NEXT: pmulhw %xmm6, %xmm2
; SSE-NEXT: pmulhw %xmm7, %xmm3
; SSE-NEXT: retq
;
; AVX2-LABEL: mulhw_v32i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpmulhw %ymm3, %ymm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512F-LABEL: mulhw_v32i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpmulhw %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpmulhw %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: mulhw_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmulhw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%a1 = sext <32 x i16> %a to <32 x i32>
%b1 = sext <32 x i16> %b to <32 x i32>
%c = mul <32 x i32> %a1, %b1
%d = lshr <32 x i32> %c, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%e = trunc <32 x i32> %d to <32 x i16>
ret <32 x i16> %e
}
define <64 x i16> @mulhuw_v64i16(<64 x i16> %a, <64 x i16> %b) {
; SSE-LABEL: mulhuw_v64i16:
; SSE: # %bb.0:
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: pmulhuw {{[0-9]+}}(%rsp), %xmm0
; SSE-NEXT: pmulhuw {{[0-9]+}}(%rsp), %xmm1
; SSE-NEXT: pmulhuw {{[0-9]+}}(%rsp), %xmm2
; SSE-NEXT: pmulhuw {{[0-9]+}}(%rsp), %xmm3
; SSE-NEXT: pmulhuw {{[0-9]+}}(%rsp), %xmm4
; SSE-NEXT: pmulhuw {{[0-9]+}}(%rsp), %xmm5
; SSE-NEXT: pmulhuw {{[0-9]+}}(%rsp), %xmm6
; SSE-NEXT: pmulhuw {{[0-9]+}}(%rsp), %xmm7
; SSE-NEXT: movdqa %xmm7, 112(%rdi)
; SSE-NEXT: movdqa %xmm6, 96(%rdi)
; SSE-NEXT: movdqa %xmm5, 80(%rdi)
; SSE-NEXT: movdqa %xmm4, 64(%rdi)
; SSE-NEXT: movdqa %xmm3, 48(%rdi)
; SSE-NEXT: movdqa %xmm2, 32(%rdi)
; SSE-NEXT: movdqa %xmm1, 16(%rdi)
; SSE-NEXT: movdqa %xmm0, (%rdi)
; SSE-NEXT: retq
;
; AVX2-LABEL: mulhuw_v64i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhuw %ymm4, %ymm0, %ymm0
; AVX2-NEXT: vpmulhuw %ymm5, %ymm1, %ymm1
; AVX2-NEXT: vpmulhuw %ymm6, %ymm2, %ymm2
; AVX2-NEXT: vpmulhuw %ymm7, %ymm3, %ymm3
; AVX2-NEXT: retq
;
; AVX512F-LABEL: mulhuw_v64i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpmulhuw %ymm4, %ymm0, %ymm0
; AVX512F-NEXT: vpmulhuw %ymm5, %ymm1, %ymm1
; AVX512F-NEXT: vpmulhuw %ymm6, %ymm2, %ymm2
; AVX512F-NEXT: vpmulhuw %ymm7, %ymm3, %ymm3
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: mulhuw_v64i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmulhuw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpmulhuw %zmm3, %zmm1, %zmm1
; AVX512BW-NEXT: retq
%a1 = zext <64 x i16> %a to <64 x i32>
%b1 = zext <64 x i16> %b to <64 x i32>
%c = mul <64 x i32> %a1, %b1
%d = lshr <64 x i32> %c, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%e = trunc <64 x i32> %d to <64 x i16>
ret <64 x i16> %e
}
define <64 x i16> @mulhw_v64i16(<64 x i16> %a, <64 x i16> %b) {
; SSE-LABEL: mulhw_v64i16:
; SSE: # %bb.0:
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm0
; SSE-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm1
; SSE-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm2
; SSE-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm3
; SSE-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm4
; SSE-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm5
; SSE-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm6
; SSE-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm7
; SSE-NEXT: movdqa %xmm7, 112(%rdi)
; SSE-NEXT: movdqa %xmm6, 96(%rdi)
; SSE-NEXT: movdqa %xmm5, 80(%rdi)
; SSE-NEXT: movdqa %xmm4, 64(%rdi)
; SSE-NEXT: movdqa %xmm3, 48(%rdi)
; SSE-NEXT: movdqa %xmm2, 32(%rdi)
; SSE-NEXT: movdqa %xmm1, 16(%rdi)
; SSE-NEXT: movdqa %xmm0, (%rdi)
; SSE-NEXT: retq
;
; AVX2-LABEL: mulhw_v64i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhw %ymm4, %ymm0, %ymm0
; AVX2-NEXT: vpmulhw %ymm5, %ymm1, %ymm1
; AVX2-NEXT: vpmulhw %ymm6, %ymm2, %ymm2
; AVX2-NEXT: vpmulhw %ymm7, %ymm3, %ymm3
; AVX2-NEXT: retq
;
; AVX512F-LABEL: mulhw_v64i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpmulhw %ymm4, %ymm0, %ymm0
; AVX512F-NEXT: vpmulhw %ymm5, %ymm1, %ymm1
; AVX512F-NEXT: vpmulhw %ymm6, %ymm2, %ymm2
; AVX512F-NEXT: vpmulhw %ymm7, %ymm3, %ymm3
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: mulhw_v64i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmulhw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpmulhw %zmm3, %zmm1, %zmm1
; AVX512BW-NEXT: retq
%a1 = sext <64 x i16> %a to <64 x i32>
%b1 = sext <64 x i16> %b to <64 x i32>
%c = mul <64 x i32> %a1, %b1
%d = lshr <64 x i32> %c, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%e = trunc <64 x i32> %d to <64 x i16>
ret <64 x i16> %e
}