AVX-512: Implemented all forms of sign-extend and zero-extend instructions for KNL and SKX

Implemented DAG lowering for all these forms.
Added tests for DAG lowering and encoding.

By Igor Breger (igor.breger@intel.com)

llvm-svn: 238301
This commit is contained in:
Elena Demikhovsky 2015-05-27 08:15:19 +00:00
parent 792b94c432
commit 3948c590e3
7 changed files with 1915 additions and 146 deletions

View File

@ -1261,6 +1261,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (MVT VT : MVT::fp_vector_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i8, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i8, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i16, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i16, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i32, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i32, Legal);
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
setOperationAction(ISD::SETCC, MVT::i1, Custom);
setOperationAction(ISD::XOR, MVT::i1, Legal);
@ -1479,7 +1492,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::v32i1, Custom);
setOperationAction(ISD::SELECT, MVT::v64i1, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom);
setOperationAction(ISD::VSELECT, MVT::v32i16, Legal);
@ -12093,13 +12110,13 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
}
static SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
SelectionDAG &DAG) {
const X86Subtarget *Subtarget, SelectionDAG &DAG) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
MVT InVT = In.getSimpleValueType();
SDLoc DL(Op);
unsigned int NumElts = VT.getVectorNumElements();
if (NumElts != 8 && NumElts != 16)
if (NumElts != 8 && NumElts != 16 && !Subtarget->hasBWI())
return SDValue();
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
@ -12137,7 +12154,7 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
MVT SVT = In.getSimpleValueType();
if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1)
return LowerZERO_EXTEND_AVX512(Op, DAG);
return LowerZERO_EXTEND_AVX512(Op, Subtarget, DAG);
if (Subtarget->hasFp256()) {
SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
@ -13876,7 +13893,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops);
}
static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget,
static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
@ -13902,7 +13920,7 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget
unsigned int NumElts = VT.getVectorNumElements();
if (NumElts != 8 && NumElts != 16)
if (NumElts != 8 && NumElts != 16 && !Subtarget->hasBWI())
return SDValue();
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) {

View File

@ -5261,79 +5261,146 @@ def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
(VPMOVQDrrkz VK8WM:$mask, VR512:$src)>;
multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass KRC,
RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode,
PatFrag mem_frag, X86MemOperand x86memop,
ValueType OpVT, ValueType InVT> {
multiclass avx512_extend_common<bits<8> opc, string OpcodeStr,
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
(ins SrcRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
(ins KRC:$mask, SrcRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
[]>, EVEX, EVEX_K;
def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
(ins KRC:$mask, SrcRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
[]>, EVEX, EVEX_KZ;
let mayLoad = 1 in {
def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
(ins x86memop:$src),
!strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst,
(OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
(ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
EVEX;
def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
(ins KRC:$mask, x86memop:$src),
!strconcat(OpcodeStr,"\t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
[]>,
EVEX, EVEX_K;
def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
(ins KRC:$mask, x86memop:$src),
!strconcat(OpcodeStr,"\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
[]>,
EVEX, EVEX_KZ;
let mayLoad = 1 in {
defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
(ins x86memop:$src), OpcodeStr ,"$src", "$src",
(DestInfo.VT (LdFrag addr:$src))>,
EVEX;
}
}
defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VK16WM, VR512, VR128X, X86vzext,
loadv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
EVEX_CD8<8, CD8VQ>;
defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VK8WM, VR512, VR128X, X86vzext,
loadv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
EVEX_CD8<8, CD8VO>;
defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VK16WM, VR512, VR256X, X86vzext,
loadv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
EVEX_CD8<16, CD8VH>;
defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VK8WM, VR512, VR128X, X86vzext,
loadv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
EVEX_CD8<16, CD8VQ>;
defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VK8WM, VR512, VR256X, X86vzext,
loadv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
EVEX_CD8<32, CD8VH>;
multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr, SDNode OpNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
let Predicates = [HasVLX, HasBWI] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v8i16x_info,
v16i8x_info, i64mem, LdFrag, OpNode>,
EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128;
defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VK16WM, VR512, VR128X, X86vsext,
loadv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
EVEX_CD8<8, CD8VQ>;
defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VK8WM, VR512, VR128X, X86vsext,
loadv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
EVEX_CD8<8, CD8VO>;
defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VK16WM, VR512, VR256X, X86vsext,
loadv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
EVEX_CD8<16, CD8VH>;
defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VK8WM, VR512, VR128X, X86vsext,
loadv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
EVEX_CD8<16, CD8VQ>;
defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext,
loadv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
EVEX_CD8<32, CD8VH>;
defm Z256: avx512_extend_common<opc, OpcodeStr, v16i16x_info,
v16i8x_info, i128mem, LdFrag, OpNode>,
EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256;
}
let Predicates = [HasBWI] in {
defm Z : avx512_extend_common<opc, OpcodeStr, v32i16_info,
v32i8x_info, i256mem, LdFrag, OpNode>,
EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512;
}
}
multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr, SDNode OpNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v4i32x_info,
v16i8x_info, i32mem, LdFrag, OpNode>,
EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v8i32x_info,
v16i8x_info, i64mem, LdFrag, OpNode>,
EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256;
}
let Predicates = [HasAVX512] in {
defm Z : avx512_extend_common<opc, OpcodeStr, v16i32_info,
v16i8x_info, i128mem, LdFrag, OpNode>,
EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512;
}
}
multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr, SDNode OpNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
v16i8x_info, i16mem, LdFrag, OpNode>,
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
v16i8x_info, i32mem, LdFrag, OpNode>,
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256;
}
let Predicates = [HasAVX512] in {
defm Z : avx512_extend_common<opc, OpcodeStr, v8i64_info,
v16i8x_info, i64mem, LdFrag, OpNode>,
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512;
}
}
multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr, SDNode OpNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v4i32x_info,
v8i16x_info, i64mem, LdFrag, OpNode>,
EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v8i32x_info,
v8i16x_info, i128mem, LdFrag, OpNode>,
EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256;
}
let Predicates = [HasAVX512] in {
defm Z : avx512_extend_common<opc, OpcodeStr, v16i32_info,
v16i16x_info, i256mem, LdFrag, OpNode>,
EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512;
}
}
multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr, SDNode OpNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
v8i16x_info, i32mem, LdFrag, OpNode>,
EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
v8i16x_info, i64mem, LdFrag, OpNode>,
EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256;
}
let Predicates = [HasAVX512] in {
defm Z : avx512_extend_common<opc, OpcodeStr, v8i64_info,
v8i16x_info, i128mem, LdFrag, OpNode>,
EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512;
}
}
multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr, SDNode OpNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
v4i32x_info, i64mem, LdFrag, OpNode>,
EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
v4i32x_info, i128mem, LdFrag, OpNode>,
EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
}
let Predicates = [HasAVX512] in {
defm Z : avx512_extend_common<opc, OpcodeStr, v8i64_info,
v8i32x_info, i256mem, LdFrag, OpNode>,
EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
}
}
defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, "z">;
defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, "z">;
defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, "z">;
defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, "z">;
defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, "z">;
defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, "z">;
defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, "s">;
defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, "s">;
defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, "s">;
defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, "s">;
defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, "s">;
defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, "s">;
//===----------------------------------------------------------------------===//
// GATHER - SCATTER Operations

View File

@ -5850,10 +5850,10 @@ multiclass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr,
OpndItins SSEItins, OpndItins AVXItins,
OpndItins AVX2Itins> {
defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, SSEItins>;
let Predicates = [HasAVX] in
let Predicates = [HasAVX, NoVLX] in
defm V#NAME : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp,
VR128, VR128, AVXItins>, VEX;
let Predicates = [HasAVX2] in
let Predicates = [HasAVX2, NoVLX] in
defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp,
VR256, VR128, AVX2Itins>, VEX, VEX_L;
}
@ -5988,7 +5988,7 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, SDNode ExtO
(!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
}
let Predicates = [HasAVX2] in {
let Predicates = [HasAVX2, NoVLX] in {
defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", X86vsext>;
defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", X86vzext>;
}
@ -6087,7 +6087,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#DQrm) addr:$src)>;
}
let Predicates = [HasAVX] in {
let Predicates = [HasAVX, NoVLX] in {
defm : SS41I_pmovx_patterns<"VPMOVSX", "s", X86vsext, extloadi32i16>;
defm : SS41I_pmovx_patterns<"VPMOVZX", "z", X86vzext, loadi16_anyext>;
}

View File

@ -1,95 +1,843 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
; CHECK-LABEL: trunc_16x32_to_16x8
; CHECK: vpmovdb
; CHECK: ret
; KNL-LABEL: trunc_16x32_to_16x8
; KNL: vpmovdb
; KNL: ret
define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) nounwind readnone {
%x = trunc <16 x i32> %i to <16 x i8>
ret <16 x i8> %x
}
; CHECK-LABEL: trunc_8x64_to_8x16
; CHECK: vpmovqw
; CHECK: ret
; KNL-LABEL: trunc_8x64_to_8x16
; KNL: vpmovqw
; KNL: ret
define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) nounwind readnone {
%x = trunc <8 x i64> %i to <8 x i16>
ret <8 x i16> %x
}
;SKX-LABEL: zext_8x8mem_to_8x16:
;SKX: ## BB#0:
;SKX-NEXT: vpmovw2m %xmm0, %k1
;SKX-NEXT: vpmovzxbw (%rdi), %xmm0 {%k1} {z}
;SKX-NEXT: retq
define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
%a = load <8 x i8>,<8 x i8> *%i,align 1
%x = zext <8 x i8> %a to <8 x i16>
%ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
ret <8 x i16> %ret
}
; CHECK-LABEL: zext_16x8_to_16x32
; CHECK: vpmovzxbd {{.*}}%zmm
; CHECK: ret
;SKX-LABEL: sext_8x8mem_to_8x16:
;SKX: ## BB#0:
;SKX-NEXT: vpmovw2m %xmm0, %k1
;SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z}
;SKX-NEXT: retq
define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
%a = load <8 x i8>,<8 x i8> *%i,align 1
%x = sext <8 x i8> %a to <8 x i16>
%ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
ret <8 x i16> %ret
}
;SKX-LABEL: zext_16x8mem_to_16x16:
;SKX: ## BB#0:
;SKX-NEXT: vpmovb2m %xmm0, %k1
;SKX-NEXT: vpmovzxbw (%rdi), %ymm0 {%k1} {z}
;SKX-NEXT: retq
define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
%a = load <16 x i8>,<16 x i8> *%i,align 1
%x = zext <16 x i8> %a to <16 x i16>
%ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
ret <16 x i16> %ret
}
;SKX-LABEL: sext_16x8mem_to_16x16:
;SKX: ## BB#0:
;SKX-NEXT: vpmovb2m %xmm0, %k1
;SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z}
;SKX-NEXT: retq
define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
%a = load <16 x i8>,<16 x i8> *%i,align 1
%x = sext <16 x i8> %a to <16 x i16>
%ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
ret <16 x i16> %ret
}
;SKX-LABEL: zext_16x8_to_16x16:
;SKX: ## BB#0:
;SKX-NEXT: vpmovzxbw %xmm0, %ymm0
;SKX-NEXT: retq
define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
%x = zext <16 x i8> %a to <16 x i16>
ret <16 x i16> %x
}
;SKX-LABEL: zext_16x8_to_16x16_mask:
;SKX: ## BB#0:
;SKX-NEXT: vpmovb2m %xmm1, %k1
;SKX-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z}
;SKX-NEXT: retq
define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
%x = zext <16 x i8> %a to <16 x i16>
%ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
ret <16 x i16> %ret
}
;SKX-LABEL: sext_16x8_to_16x16:
;SKX: ## BB#0:
;SKX-NEXT: vpmovsxbw %xmm0, %ymm0
;SKX-NEXT: retq
define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
%x = sext <16 x i8> %a to <16 x i16>
ret <16 x i16> %x
}
;SKX-LABEL: sext_16x8_to_16x16_mask:
;SKX: ## BB#0:
;SKX-NEXT: vpmovb2m %xmm1, %k1
;SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z}
;SKX-NEXT: retq
define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
%x = sext <16 x i8> %a to <16 x i16>
%ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
ret <16 x i16> %ret
}
;SKX-LABEL: zext_32x8mem_to_32x16:
;SKX: ## BB#0:
;SKX-NEXT: vpmovb2m %ymm0, %k1
;SKX-NEXT: vpmovzxbw (%rdi), %zmm0 {%k1} {z}
;SKX-NEXT: retq
define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
%a = load <32 x i8>,<32 x i8> *%i,align 1
%x = zext <32 x i8> %a to <32 x i16>
%ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
ret <32 x i16> %ret
}
;SKX-LABEL: sext_32x8mem_to_32x16:
;SKX: ## BB#0:
;SKX-NEXT: vpmovb2m %ymm0, %k1
;SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z}
;SKX-NEXT: retq
define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
%a = load <32 x i8>,<32 x i8> *%i,align 1
%x = sext <32 x i8> %a to <32 x i16>
%ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
ret <32 x i16> %ret
}
;SKX-LABEL: zext_32x8_to_32x16:
;SKX: ## BB#0:
;SKX-NEXT: vpmovzxbw %ymm0, %zmm0
;SKX-NEXT: retq
define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
%x = zext <32 x i8> %a to <32 x i16>
ret <32 x i16> %x
}
;SKX-LABEL: zext_32x8_to_32x16_mask:
;SKX: ## BB#0:
;SKX-NEXT: vpmovb2m %ymm1, %k1
;SKX-NEXT: vpmovzxbw %ymm0, %zmm0 {%k1} {z}
;SKX-NEXT: retq
define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
%x = zext <32 x i8> %a to <32 x i16>
%ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
ret <32 x i16> %ret
}
;SKX-LABEL: sext_32x8_to_32x16:
;SKX: ## BB#0:
;SKX-NEXT: vpmovsxbw %ymm0, %zmm0
;SKX-NEXT: retq
define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
%x = sext <32 x i8> %a to <32 x i16>
ret <32 x i16> %x
}
;SKX-LABEL: sext_32x8_to_32x16_mask:
;SKX: ## BB#0:
;SKX-NEXT: vpmovb2m %ymm1, %k1
;SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z}
;SKX-NEXT: retq
define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
%x = sext <32 x i8> %a to <32 x i16>
%ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
ret <32 x i16> %ret
}
;SKX-LABEL: zext_4x8mem_to_4x32:
;SKX: ## BB#0:
;SKX-NEXT: vpmovd2m %xmm0, %k1
;SKX-NEXT: vpmovzxbd (%rdi), %xmm0 {%k1} {z}
;SKX-NEXT: retq
define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
%a = load <4 x i8>,<4 x i8> *%i,align 1
%x = zext <4 x i8> %a to <4 x i32>
%ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
ret <4 x i32> %ret
}
;SKX-LABEL: sext_4x8mem_to_4x32:
;SKX: ## BB#0:
;SKX-NEXT: vpmovd2m %xmm0, %k1
;SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z}
;SKX-NEXT: retq
define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
%a = load <4 x i8>,<4 x i8> *%i,align 1
%x = sext <4 x i8> %a to <4 x i32>
%ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
ret <4 x i32> %ret
}
;SKX-LABEL: zext_8x8mem_to_8x32:
;SKX: ## BB#0:
;SKX-NEXT: vpmovw2m %xmm0, %k1
;SKX-NEXT: vpmovzxbd (%rdi), %ymm0 {%k1} {z}
;SKX-NEXT: retq
define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
%a = load <8 x i8>,<8 x i8> *%i,align 1
%x = zext <8 x i8> %a to <8 x i32>
%ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
ret <8 x i32> %ret
}
;SKX-LABEL: sext_8x8mem_to_8x32:
;SKX: ## BB#0:
;SKX-NEXT: vpmovw2m %xmm0, %k1
;SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z}
;SKX-NEXT: retq
define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
%a = load <8 x i8>,<8 x i8> *%i,align 1
%x = sext <8 x i8> %a to <8 x i32>
%ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
ret <8 x i32> %ret
}
;KNL-LABEL: zext_16x8mem_to_16x32:
;KNL: vpmovzxbd (%rdi), %zmm0 {%k1} {z}
;KNL-NEXT: retq
define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
%a = load <16 x i8>,<16 x i8> *%i,align 1
%x = zext <16 x i8> %a to <16 x i32>
%ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
ret <16 x i32> %ret
}
;KNL-LABEL: sext_16x8mem_to_16x32:
;KNL: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
;KNL-NEXT: retq
define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
%a = load <16 x i8>,<16 x i8> *%i,align 1
%x = sext <16 x i8> %a to <16 x i32>
%ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
ret <16 x i32> %ret
}
;KNL-LABEL: zext_16x8_to_16x32_mask:
;KNL: vpmovzxbd %xmm0, %zmm0 {%k1} {z}
;KNL-NEXT: retq
define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
%x = zext <16 x i8> %a to <16 x i32>
%ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
ret <16 x i32> %ret
}
;KNL-LABEL: sext_16x8_to_16x32_mask:
;KNL: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
;KNL-NEXT: retq
define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
%x = sext <16 x i8> %a to <16 x i32>
%ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
ret <16 x i32> %ret
}
; KNL-LABEL: zext_16x8_to_16x32
; KNL: vpmovzxbd {{.*}}%zmm
; KNL: ret
define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
%x = zext <16 x i8> %i to <16 x i32>
ret <16 x i32> %x
}
; CHECK-LABEL: sext_16x8_to_16x32
; CHECK: vpmovsxbd {{.*}}%zmm
; CHECK: ret
; KNL-LABEL: sext_16x8_to_16x32
; KNL: vpmovsxbd {{.*}}%zmm
; KNL: ret
define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
%x = sext <16 x i8> %i to <16 x i32>
ret <16 x i32> %x
}
; CHECK-LABEL: zext_16x16_to_16x32
; CHECK: vpmovzxwd {{.*}}%zmm
; CHECK: ret
define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %i) nounwind readnone {
%x = zext <16 x i16> %i to <16 x i32>
ret <16 x i32> %x
;SKX-LABEL: zext_2x8mem_to_2x64:
;SKX: ## BB#0:
;SKX-NEXT: vpmovq2m %xmm0, %k1
;SKX-NEXT: vpmovzxbq (%rdi), %xmm0 {%k1} {z}
;SKX-NEXT: retq
define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
%a = load <2 x i8>,<2 x i8> *%i,align 1
%x = zext <2 x i8> %a to <2 x i64>
%ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
ret <2 x i64> %ret
}
;SKX-LABEL: sext_2x8mem_to_2x64mask:
;SKX: ## BB#0:
;SKX-NEXT: vpmovq2m %xmm0, %k1
;SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z}
;SKX-NEXT: retq
define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
%a = load <2 x i8>,<2 x i8> *%i,align 1
%x = sext <2 x i8> %a to <2 x i64>
%ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
ret <2 x i64> %ret
}
;SKX-LABEL: sext_2x8mem_to_2x64:
;SKX: ## BB#0:
;SKX-NEXT: vpmovsxbq (%rdi), %xmm0
;SKX-NEXT: retq
define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone {
%a = load <2 x i8>,<2 x i8> *%i,align 1
%x = sext <2 x i8> %a to <2 x i64>
ret <2 x i64> %x
}
; CHECK-LABEL: zext_8x16_to_8x64
; CHECK: vpmovzxwq
; CHECK: ret
define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %i) nounwind readnone {
%x = zext <8 x i16> %i to <8 x i64>
;SKX-LABEL: zext_4x8mem_to_4x64:
;SKX: ## BB#0:
;SKX-NEXT: vpmovd2m %xmm0, %k1
;SKX-NEXT: vpmovzxbq (%rdi), %ymm0 {%k1} {z}
;SKX-NEXT: retq
define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
%a = load <4 x i8>,<4 x i8> *%i,align 1
%x = zext <4 x i8> %a to <4 x i64>
%ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
ret <4 x i64> %ret
}
;SKX-LABEL: sext_4x8mem_to_4x64mask:
;SKX: ## BB#0:
;SKX-NEXT: vpmovd2m %xmm0, %k1
;SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z}
;SKX-NEXT: retq
define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
%a = load <4 x i8>,<4 x i8> *%i,align 1
%x = sext <4 x i8> %a to <4 x i64>
%ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
ret <4 x i64> %ret
}
;SKX-LABEL: sext_4x8mem_to_4x64:
;SKX: ## BB#0:
;SKX-NEXT: vpmovsxbq (%rdi), %ymm0
;SKX-NEXT: retq
define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
%a = load <4 x i8>,<4 x i8> *%i,align 1
%x = sext <4 x i8> %a to <4 x i64>
ret <4 x i64> %x
}
;KNL-LABEL: zext_8x8mem_to_8x64:
;KNL: vpmovzxbq (%rdi), %zmm0 {%k1} {z}
;KNL-NEXT: retq
define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
%a = load <8 x i8>,<8 x i8> *%i,align 1
%x = zext <8 x i8> %a to <8 x i64>
%ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
ret <8 x i64> %ret
}
;KNL-LABEL: sext_8x8mem_to_8x64mask:
;KNL: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
;KNL-NEXT: retq
define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
%a = load <8 x i8>,<8 x i8> *%i,align 1
%x = sext <8 x i8> %a to <8 x i64>
%ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
ret <8 x i64> %ret
}
;KNL-LABEL: sext_8x8mem_to_8x64:
;KNL: vpmovsxbq (%rdi), %zmm0
;KNL-NEXT: retq
define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
%a = load <8 x i8>,<8 x i8> *%i,align 1
%x = sext <8 x i8> %a to <8 x i64>
ret <8 x i64> %x
}
;CHECK-LABEL: fptrunc_test
;CHECK: vcvtpd2ps {{.*}}%zmm
;CHECK: ret
;SKX-LABEL: zext_4x16mem_to_4x32:
;SKX: ## BB#0:
;SKX-NEXT: vpmovd2m %xmm0, %k1
;SKX-NEXT: vpmovzxwd (%rdi), %xmm0 {%k1} {z}
;SKX-NEXT: retq
define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
%a = load <4 x i16>,<4 x i16> *%i,align 1
%x = zext <4 x i16> %a to <4 x i32>
%ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
ret <4 x i32> %ret
}
;SKX-LABEL: sext_4x16mem_to_4x32mask:
;SKX: ## BB#0:
;SKX-NEXT: vpmovd2m %xmm0, %k1
;SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z}
;SKX-NEXT: retq
define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
%a = load <4 x i16>,<4 x i16> *%i,align 1
%x = sext <4 x i16> %a to <4 x i32>
%ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
ret <4 x i32> %ret
}
;SKX-LABEL: sext_4x16mem_to_4x32:
;SKX: ## BB#0:
;SKX-NEXT: vpmovsxwd (%rdi), %xmm0
;SKX-NEXT: retq
define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone {
%a = load <4 x i16>,<4 x i16> *%i,align 1
%x = sext <4 x i16> %a to <4 x i32>
ret <4 x i32> %x
}
;SKX-LABEL: zext_8x16mem_to_8x32:
;SKX: ## BB#0:
;SKX-NEXT: vpmovw2m %xmm0, %k1
;SKX-NEXT: vpmovzxwd (%rdi), %ymm0 {%k1} {z}
;SKX-NEXT: retq
define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
%a = load <8 x i16>,<8 x i16> *%i,align 1
%x = zext <8 x i16> %a to <8 x i32>
%ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
ret <8 x i32> %ret
}
;SKX-LABEL: sext_8x16mem_to_8x32mask:
;SKX: ## BB#0:
;SKX-NEXT: vpmovw2m %xmm0, %k1
;SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z}
;SKX-NEXT: retq
define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
%a = load <8 x i16>,<8 x i16> *%i,align 1
%x = sext <8 x i16> %a to <8 x i32>
%ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
ret <8 x i32> %ret
}
;SKX-LABEL: sext_8x16mem_to_8x32:
;SKX: ## BB#0:
;SKX-NEXT: vpmovsxwd (%rdi), %ymm0
;SKX-NEXT: retq
define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
%a = load <8 x i16>,<8 x i16> *%i,align 1
%x = sext <8 x i16> %a to <8 x i32>
ret <8 x i32> %x
}
;SKX-LABEL: zext_8x16_to_8x32mask:
;SKX: ## BB#0:
;SKX-NEXT: vpmovw2m %xmm1, %k1
;SKX-NEXT: vpmovzxwd %xmm0, %ymm0 {%k1} {z}
;SKX-NEXT: retq
define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
%x = zext <8 x i16> %a to <8 x i32>
%ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
ret <8 x i32> %ret
}
;SKX-LABEL: zext_8x16_to_8x32:
;SKX: ## BB#0:
;SKX-NEXT: vpmovzxwd %xmm0, %ymm0
;SKX-NEXT: retq
define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
%x = zext <8 x i16> %a to <8 x i32>
ret <8 x i32> %x
}
;SKX-LABEL: zext_16x16mem_to_16x32:
;KNL-LABEL: zext_16x16mem_to_16x32:
;SKX: ## BB#0:
;SKX-NEXT: vpmovb2m %xmm0, %k1
;SKX-NEXT: vpmovzxwd (%rdi), %zmm0 {%k1} {z}
;KNL: vpmovzxwd (%rdi), %zmm0 {%k1} {z}
;SKX-NEXT: retq
define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
%a = load <16 x i16>,<16 x i16> *%i,align 1
%x = zext <16 x i16> %a to <16 x i32>
%ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
ret <16 x i32> %ret
}
;SKX-LABEL: sext_16x16mem_to_16x32mask:
;KNL-LABEL: sext_16x16mem_to_16x32mask:
;SKX: ## BB#0:
;SKX-NEXT: vpmovb2m %xmm0, %k1
;SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
;KNL: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
;SKX-NEXT: retq
define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
%a = load <16 x i16>,<16 x i16> *%i,align 1
%x = sext <16 x i16> %a to <16 x i32>
%ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
ret <16 x i32> %ret
}
;SKX-LABEL: sext_16x16mem_to_16x32:
;KNL-LABEL: sext_16x16mem_to_16x32:
;SKX: ## BB#0:
;SKX-NEXT: vpmovsxwd (%rdi), %zmm0
;KNL: vpmovsxwd (%rdi), %zmm0
;SKX-NEXT: retq
define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
%a = load <16 x i16>,<16 x i16> *%i,align 1
%x = sext <16 x i16> %a to <16 x i32>
ret <16 x i32> %x
}
;SKX-LABEL: zext_16x16_to_16x32mask:
;KNL-LABEL: zext_16x16_to_16x32mask:
;SKX: ## BB#0:
;SKX-NEXT: vpmovb2m %xmm1, %k1
;SKX-NEXT: vpmovzxwd %ymm0, %zmm0 {%k1} {z}
;KNL: vpmovzxwd %ymm0, %zmm0 {%k1} {z}
;SKX-NEXT: retq
define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
%x = zext <16 x i16> %a to <16 x i32>
%ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
ret <16 x i32> %ret
}
;SKX-LABEL: zext_16x16_to_16x32:
;KNL-LABEL: zext_16x16_to_16x32:
;SKX: ## BB#0:
;SKX-NEXT: vpmovzxwd %ymm0, %zmm0
;KNL: vpmovzxwd %ymm0, %zmm0
;SKX-NEXT: retq
define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
%x = zext <16 x i16> %a to <16 x i32>
ret <16 x i32> %x
}
;SKX-LABEL: zext_2x16mem_to_2x64:
;SKX: ## BB#0:
;SKX-NEXT: vpmovq2m %xmm0, %k1
;SKX-NEXT: vpmovzxwq (%rdi), %xmm0 {%k1} {z}
;SKX-NEXT: retq
define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
%a = load <2 x i16>,<2 x i16> *%i,align 1
%x = zext <2 x i16> %a to <2 x i64>
%ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
ret <2 x i64> %ret
}
;SKX-LABEL: sext_2x16mem_to_2x64mask:
;SKX: ## BB#0:
;SKX-NEXT: vpmovq2m %xmm0, %k1
;SKX-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z}
;SKX-NEXT: retq
define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
%a = load <2 x i16>,<2 x i16> *%i,align 1
%x = sext <2 x i16> %a to <2 x i64>
%ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
ret <2 x i64> %ret
}
;SKX-LABEL: sext_2x16mem_to_2x64:
;SKX: ## BB#0:
;SKX-NEXT: vpmovsxwq (%rdi), %xmm0
;SKX-NEXT: retq
define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone {
%a = load <2 x i16>,<2 x i16> *%i,align 1
%x = sext <2 x i16> %a to <2 x i64>
ret <2 x i64> %x
}
;SKX-LABEL: zext_4x16mem_to_4x64:
;SKX: ## BB#0:
;SKX-NEXT: vpmovd2m %xmm0, %k1
;SKX-NEXT: vpmovzxwq (%rdi), %ymm0 {%k1} {z}
;SKX-NEXT: retq
define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
%a = load <4 x i16>,<4 x i16> *%i,align 1
%x = zext <4 x i16> %a to <4 x i64>
%ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
ret <4 x i64> %ret
}
;SKX-LABEL: sext_4x16mem_to_4x64mask:
;SKX: ## BB#0:
;SKX-NEXT: vpmovd2m %xmm0, %k1
;SKX-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z}
;SKX-NEXT: retq
define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
%a = load <4 x i16>,<4 x i16> *%i,align 1
%x = sext <4 x i16> %a to <4 x i64>
%ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
ret <4 x i64> %ret
}
;SKX-LABEL: sext_4x16mem_to_4x64:
;SKX: ## BB#0:
;SKX-NEXT: vpmovsxwq (%rdi), %ymm0
;SKX-NEXT: retq
define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
%a = load <4 x i16>,<4 x i16> *%i,align 1
%x = sext <4 x i16> %a to <4 x i64>
ret <4 x i64> %x
}
;SKX-LABEL: zext_8x16mem_to_8x64:
;KNL-LABEL: zext_8x16mem_to_8x64:
;SKX: ## BB#0:
;SKX-NEXT: vpmovw2m %xmm0, %k1
;SKX-NEXT: vpmovzxwq (%rdi), %zmm0 {%k1} {z}
;KNL: vpmovzxwq (%rdi), %zmm0 {%k1} {z}
;SKX-NEXT: retq
define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
%a = load <8 x i16>,<8 x i16> *%i,align 1
%x = zext <8 x i16> %a to <8 x i64>
%ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
ret <8 x i64> %ret
}
;SKX-LABEL: sext_8x16mem_to_8x64mask:
;KNL-LABEL: sext_8x16mem_to_8x64mask:
;SKX: ## BB#0:
;SKX-NEXT: vpmovw2m %xmm0, %k1
;SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
;KNL: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
;SKX-NEXT: retq
define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
%a = load <8 x i16>,<8 x i16> *%i,align 1
%x = sext <8 x i16> %a to <8 x i64>
%ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
ret <8 x i64> %ret
}
;SKX-LABEL: sext_8x16mem_to_8x64:
;KNL-LABEL: sext_8x16mem_to_8x64:
;SKX: ## BB#0:
;SKX-NEXT: vpmovsxwq (%rdi), %zmm0
;KNL: vpmovsxwq (%rdi), %zmm0
;SKX-NEXT: retq
define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
%a = load <8 x i16>,<8 x i16> *%i,align 1
%x = sext <8 x i16> %a to <8 x i64>
ret <8 x i64> %x
}
;SKX-LABEL: zext_8x16_to_8x64mask:
;KNL-LABEL: zext_8x16_to_8x64mask:
;SKX: ## BB#0:
;SKX-NEXT: vpmovw2m %xmm1, %k1
;SKX-NEXT: vpmovzxwq %xmm0, %zmm0 {%k1} {z}
;KNL: vpmovzxwq %xmm0, %zmm0 {%k1} {z}
;SKX-NEXT: retq
define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
%x = zext <8 x i16> %a to <8 x i64>
%ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
ret <8 x i64> %ret
}
;SKX-LABEL: zext_8x16_to_8x64:
;KNL-LABEL: zext_8x16_to_8x64:
;SKX: ## BB#0:
;SKX-NEXT: vpmovzxwq %xmm0, %zmm0
;KNL: vpmovzxwq %xmm0, %zmm0
;SKX-NEXT: retq
; KNL: ret
define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
%ret = zext <8 x i16> %a to <8 x i64>
ret <8 x i64> %ret
}
;SKX-LABEL: zext_2x32mem_to_2x64:
;SKX: ## BB#0:
;SKX-NEXT: vpmovq2m %xmm0, %k1
;SKX-NEXT: vpmovzxdq (%rdi), %xmm0 {%k1} {z}
;SKX-NEXT: retq
define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
%a = load <2 x i32>,<2 x i32> *%i,align 1
%x = zext <2 x i32> %a to <2 x i64>
%ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
ret <2 x i64> %ret
}
;SKX-LABEL: sext_2x32mem_to_2x64mask:
;SKX: ## BB#0:
;SKX-NEXT: vpmovq2m %xmm0, %k1
;SKX-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z}
;SKX-NEXT: retq
define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
%a = load <2 x i32>,<2 x i32> *%i,align 1
%x = sext <2 x i32> %a to <2 x i64>
%ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
ret <2 x i64> %ret
}
;SKX-LABEL: sext_2x32mem_to_2x64:
;SKX: ## BB#0:
;SKX-NEXT: vpmovsxdq (%rdi), %xmm0
;SKX-NEXT: retq
define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone {
%a = load <2 x i32>,<2 x i32> *%i,align 1
%x = sext <2 x i32> %a to <2 x i64>
ret <2 x i64> %x
}
;SKX-LABEL: zext_4x32mem_to_4x64:
;SKX: ## BB#0:
;SKX-NEXT: vpmovd2m %xmm0, %k1
;SKX-NEXT: vpmovzxdq (%rdi), %ymm0 {%k1} {z}
;SKX-NEXT: retq
define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
%a = load <4 x i32>,<4 x i32> *%i,align 1
%x = zext <4 x i32> %a to <4 x i64>
%ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
ret <4 x i64> %ret
}
;SKX-LABEL: sext_4x32mem_to_4x64mask:
;SKX: ## BB#0:
;SKX-NEXT: vpmovd2m %xmm0, %k1
;SKX-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z}
;SKX-NEXT: retq
define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
%a = load <4 x i32>,<4 x i32> *%i,align 1
%x = sext <4 x i32> %a to <4 x i64>
%ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
ret <4 x i64> %ret
}
;SKX-LABEL: sext_4x32mem_to_4x64:
;SKX: ## BB#0:
;SKX-NEXT: vpmovsxdq (%rdi), %ymm0
;SKX-NEXT: retq
define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
%a = load <4 x i32>,<4 x i32> *%i,align 1
%x = sext <4 x i32> %a to <4 x i64>
ret <4 x i64> %x
}
;SKX-LABEL: sext_4x32_to_4x64:
;SKX: ## BB#0:
;SKX-NEXT: vpmovsxdq %xmm0, %ymm0
;SKX-NEXT: retq
define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
%x = sext <4 x i32> %a to <4 x i64>
ret <4 x i64> %x
}
;SKX-LABEL: zext_4x32_to_4x64mask:
;SKX: ## BB#0:
;SKX-NEXT: vpmovd2m %xmm1, %k1
;SKX-NEXT: vpmovzxdq %xmm0, %ymm0 {%k1} {z}
;SKX-NEXT: retq
define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
%x = zext <4 x i32> %a to <4 x i64>
%ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
ret <4 x i64> %ret
}
;SKX-LABEL: zext_8x32mem_to_8x64:
;SKX: ## BB#0:
;SKX-NEXT: vpmovw2m %xmm0, %k1
;SKX-NEXT: vpmovzxdq (%rdi), %zmm0 {%k1} {z}
;SKX-NEXT: retq
define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
%a = load <8 x i32>,<8 x i32> *%i,align 1
%x = zext <8 x i32> %a to <8 x i64>
%ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
ret <8 x i64> %ret
}
;SKX-LABEL: sext_8x32mem_to_8x64mask:
;SKX: ## BB#0:
;SKX-NEXT: vpmovw2m %xmm0, %k1
;SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z}
;SKX-NEXT: retq
define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
%a = load <8 x i32>,<8 x i32> *%i,align 1
%x = sext <8 x i32> %a to <8 x i64>
%ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
ret <8 x i64> %ret
}
;SKX-LABEL: sext_8x32mem_to_8x64:
;KNL-LABEL: sext_8x32mem_to_8x64:
;SKX: ## BB#0:
;SKX-NEXT: vpmovsxdq (%rdi), %zmm0
;KNL: vpmovsxdq (%rdi), %zmm0
;SKX-NEXT: retq
define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
%a = load <8 x i32>,<8 x i32> *%i,align 1
%x = sext <8 x i32> %a to <8 x i64>
ret <8 x i64> %x
}
;SKX-LABEL: sext_8x32_to_8x64:
;KNL-LABEL: sext_8x32_to_8x64:
;SKX: ## BB#0:
;SKX-NEXT: vpmovsxdq %ymm0, %zmm0
;KNL: vpmovsxdq %ymm0, %zmm0
;SKX-NEXT: retq
define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
%x = sext <8 x i32> %a to <8 x i64>
ret <8 x i64> %x
}
;SKX-LABEL: zext_8x32_to_8x64mask:
;KNL-LABEL: zext_8x32_to_8x64mask:
;SKX: ## BB#0:
;SKX-NEXT: vpmovw2m %xmm1, %k1
;SKX-NEXT: vpmovzxdq %ymm0, %zmm0 {%k1} {z}
;KNL: vpmovzxdq %ymm0, %zmm0 {%k1} {z}
;SKX-NEXT: retq
define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
%x = zext <8 x i32> %a to <8 x i64>
%ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
ret <8 x i64> %ret
}
;KNL-LABEL: fptrunc_test
;KNL: vcvtpd2ps {{.*}}%zmm
;KNL: ret
define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
%b = fptrunc <8 x double> %a to <8 x float>
ret <8 x float> %b
}
;CHECK-LABEL: fpext_test
;CHECK: vcvtps2pd {{.*}}%zmm
;CHECK: ret
;KNL-LABEL: fpext_test
;KNL: vcvtps2pd {{.*}}%zmm
;KNL: ret
define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
%b = fpext <8 x float> %a to <8 x double>
ret <8 x double> %b
}
; CHECK-LABEL: zext_16i1_to_16xi32
; CHECK: vpbroadcastd LCP{{.*}}(%rip), %zmm0 {%k1} {z}
; CHECK: ret
; KNL-LABEL: zext_16i1_to_16xi32
; KNL: vpbroadcastd LCP{{.*}}(%rip), %zmm0 {%k1} {z}
; KNL: ret
define <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
%a = bitcast i16 %b to <16 x i1>
%c = zext <16 x i1> %a to <16 x i32>
ret <16 x i32> %c
}
; CHECK-LABEL: zext_8i1_to_8xi64
; CHECK: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
; CHECK: ret
; KNL-LABEL: zext_8i1_to_8xi64
; KNL: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
; KNL: ret
define <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
%a = bitcast i8 %b to <8 x i1>
%c = zext <8 x i1> %a to <8 x i64>
ret <8 x i64> %c
}
; CHECK-LABEL: trunc_16i8_to_16i1
; CHECK: vpmovsxbd
; CHECK: vpandd
; CHECK: vptestmd
; CHECK: ret
; KNL-LABEL: trunc_16i8_to_16i1
; KNL: vpmovsxbd
; KNL: vpandd
; KNL: vptestmd
; KNL: ret
; SKX-LABEL: trunc_16i8_to_16i1
; SKX: vpmovb2m %xmm
define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
@ -98,10 +846,10 @@ define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
ret i16 %mask
}
; CHECK-LABEL: trunc_16i32_to_16i1
; CHECK: vpandd
; CHECK: vptestmd
; CHECK: ret
; KNL-LABEL: trunc_16i32_to_16i1
; KNL: vpandd
; KNL: vptestmd
; KNL: ret
; SKX-LABEL: trunc_16i32_to_16i1
; SKX: vpmovd2m %zmm
define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
@ -122,11 +870,11 @@ define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
ret <4 x i32>%res
}
; CHECK-LABEL: trunc_8i16_to_8i1
; CHECK: vpmovsxwq
; CHECK: vpandq LCP{{.*}}(%rip){1to8}
; CHECK: vptestmq
; CHECK: ret
; KNL-LABEL: trunc_8i16_to_8i1
; KNL: vpmovsxwq
; KNL: vpandq LCP{{.*}}(%rip){1to8}
; KNL: vptestmq
; KNL: ret
; SKX-LABEL: trunc_8i16_to_8i1
; SKX: vpmovw2m %xmm
@ -136,10 +884,10 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
ret i8 %mask
}
; CHECK-LABEL: sext_8i1_8i32
; CHECK: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
; KNL-LABEL: sext_8i1_8i32
; KNL: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
; SKX: vpmovm2d
; CHECK: ret
; KNL: ret
define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
%x = icmp slt <8 x i32> %a1, %a2
%x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
@ -147,18 +895,18 @@ define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
ret <8 x i32> %y
}
; CHECK-LABEL: trunc_v16i32_to_v16i16
; CHECK: vpmovdw
; CHECK: ret
; KNL-LABEL: trunc_v16i32_to_v16i16
; KNL: vpmovdw
; KNL: ret
define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) {
%1 = trunc <16 x i32> %x to <16 x i16>
ret <16 x i16> %1
}
; CHECK-LABEL: trunc_i32_to_i1
; CHECK: movw $-4, %ax
; CHECK: kmovw %eax, %k1
; CKECK: korw
; KNL-LABEL: trunc_i32_to_i1
; KNL: movw $-4, %ax
; KNL: kmovw %eax, %k1
; KNL: korw
define i16 @trunc_i32_to_i1(i32 %a) {
%a_i = trunc i32 %a to i1
%maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
@ -166,35 +914,35 @@ define i16 @trunc_i32_to_i1(i32 %a) {
ret i16 %res
}
; CHECK-LABEL: sext_8i1_8i16
; KNL-LABEL: sext_8i1_8i16
; SKX: vpmovm2w
; CHECK: ret
; KNL: ret
define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
%x = icmp slt <8 x i32> %a1, %a2
%y = sext <8 x i1> %x to <8 x i16>
ret <8 x i16> %y
}
; CHECK-LABEL: sext_16i1_16i32
; KNL-LABEL: sext_16i1_16i32
; SKX: vpmovm2d
; CHECK: ret
; KNL: ret
define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
%x = icmp slt <16 x i32> %a1, %a2
%y = sext <16 x i1> %x to <16 x i32>
ret <16 x i32> %y
}
; CHECK-LABEL: sext_8i1_8i64
; KNL-LABEL: sext_8i1_8i64
; SKX: vpmovm2q
; CHECK: ret
; KNL: ret
define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
%x = icmp slt <8 x i32> %a1, %a2
%y = sext <8 x i1> %x to <8 x i64>
ret <8 x i64> %y
}
; CHECK-LABEL: @extload_v8i64
; CHECK: vpmovsxbq
; KNL-LABEL: @extload_v8i64
; KNL: vpmovsxbq
define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
%sign_load = load <8 x i8>, <8 x i8>* %a
%c = sext <8 x i8> %sign_load to <8 x i64>

View File

@ -511,6 +511,78 @@
// CHECK: encoding: [0x62,0xe2,0x15,0x40,0x3a,0x9a,0xc0,0xdf,0xff,0xff]
vpminuw -8256(%rdx), %zmm29, %zmm19
// CHECK: vpmovsxbw %ymm18, %zmm22
// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x20,0xf2]
vpmovsxbw %ymm18, %zmm22
// CHECK: vpmovsxbw %ymm18, %zmm22 {%k5}
// CHECK: encoding: [0x62,0xa2,0x7d,0x4d,0x20,0xf2]
vpmovsxbw %ymm18, %zmm22 {%k5}
// CHECK: vpmovsxbw %ymm18, %zmm22 {%k5} {z}
// CHECK: encoding: [0x62,0xa2,0x7d,0xcd,0x20,0xf2]
vpmovsxbw %ymm18, %zmm22 {%k5} {z}
// CHECK: vpmovsxbw (%rcx), %zmm22
// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x20,0x31]
vpmovsxbw (%rcx), %zmm22
// CHECK: vpmovsxbw 291(%rax,%r14,8), %zmm22
// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x20,0xb4,0xf0,0x23,0x01,0x00,0x00]
vpmovsxbw 291(%rax,%r14,8), %zmm22
// CHECK: vpmovsxbw 4064(%rdx), %zmm22
// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x20,0x72,0x7f]
vpmovsxbw 4064(%rdx), %zmm22
// CHECK: vpmovsxbw 4096(%rdx), %zmm22
// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x20,0xb2,0x00,0x10,0x00,0x00]
vpmovsxbw 4096(%rdx), %zmm22
// CHECK: vpmovsxbw -4096(%rdx), %zmm22
// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x20,0x72,0x80]
vpmovsxbw -4096(%rdx), %zmm22
// CHECK: vpmovsxbw -4128(%rdx), %zmm22
// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x20,0xb2,0xe0,0xef,0xff,0xff]
vpmovsxbw -4128(%rdx), %zmm22
// CHECK: vpmovzxbw %ymm26, %zmm24
// CHECK: encoding: [0x62,0x02,0x7d,0x48,0x30,0xc2]
vpmovzxbw %ymm26, %zmm24
// CHECK: vpmovzxbw %ymm26, %zmm24 {%k4}
// CHECK: encoding: [0x62,0x02,0x7d,0x4c,0x30,0xc2]
vpmovzxbw %ymm26, %zmm24 {%k4}
// CHECK: vpmovzxbw %ymm26, %zmm24 {%k4} {z}
// CHECK: encoding: [0x62,0x02,0x7d,0xcc,0x30,0xc2]
vpmovzxbw %ymm26, %zmm24 {%k4} {z}
// CHECK: vpmovzxbw (%rcx), %zmm24
// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x30,0x01]
vpmovzxbw (%rcx), %zmm24
// CHECK: vpmovzxbw 291(%rax,%r14,8), %zmm24
// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x30,0x84,0xf0,0x23,0x01,0x00,0x00]
vpmovzxbw 291(%rax,%r14,8), %zmm24
// CHECK: vpmovzxbw 4064(%rdx), %zmm24
// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x30,0x42,0x7f]
vpmovzxbw 4064(%rdx), %zmm24
// CHECK: vpmovzxbw 4096(%rdx), %zmm24
// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x30,0x82,0x00,0x10,0x00,0x00]
vpmovzxbw 4096(%rdx), %zmm24
// CHECK: vpmovzxbw -4096(%rdx), %zmm24
// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x30,0x42,0x80]
vpmovzxbw -4096(%rdx), %zmm24
// CHECK: vpmovzxbw -4128(%rdx), %zmm24
// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x30,0x82,0xe0,0xef,0xff,0xff]
vpmovzxbw -4128(%rdx), %zmm24
// CHECK: vpmullw %zmm19, %zmm28, %zmm19
// CHECK: encoding: [0x62,0xa1,0x1d,0x40,0xd5,0xdb]
vpmullw %zmm19, %zmm28, %zmm19

View File

@ -1312,6 +1312,150 @@
// CHECK: encoding: [0x62,0xe2,0x25,0x20,0x3a,0xa2,0xe0,0xef,0xff,0xff]
vpminuw -4128(%rdx), %ymm27, %ymm20
// CHECK: vpmovsxbw %xmm23, %xmm27
// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x20,0xdf]
vpmovsxbw %xmm23, %xmm27
// CHECK: vpmovsxbw %xmm23, %xmm27 {%k7}
// CHECK: encoding: [0x62,0x22,0x7d,0x0f,0x20,0xdf]
vpmovsxbw %xmm23, %xmm27 {%k7}
// CHECK: vpmovsxbw %xmm23, %xmm27 {%k7} {z}
// CHECK: encoding: [0x62,0x22,0x7d,0x8f,0x20,0xdf]
vpmovsxbw %xmm23, %xmm27 {%k7} {z}
// CHECK: vpmovsxbw (%rcx), %xmm27
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x20,0x19]
vpmovsxbw (%rcx), %xmm27
// CHECK: vpmovsxbw 291(%rax,%r14,8), %xmm27
// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x20,0x9c,0xf0,0x23,0x01,0x00,0x00]
vpmovsxbw 291(%rax,%r14,8), %xmm27
// CHECK: vpmovsxbw 1016(%rdx), %xmm27
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x20,0x5a,0x7f]
vpmovsxbw 1016(%rdx), %xmm27
// CHECK: vpmovsxbw 1024(%rdx), %xmm27
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x20,0x9a,0x00,0x04,0x00,0x00]
vpmovsxbw 1024(%rdx), %xmm27
// CHECK: vpmovsxbw -1024(%rdx), %xmm27
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x20,0x5a,0x80]
vpmovsxbw -1024(%rdx), %xmm27
// CHECK: vpmovsxbw -1032(%rdx), %xmm27
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x20,0x9a,0xf8,0xfb,0xff,0xff]
vpmovsxbw -1032(%rdx), %xmm27
// CHECK: vpmovsxbw %xmm23, %ymm21
// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x20,0xef]
vpmovsxbw %xmm23, %ymm21
// CHECK: vpmovsxbw %xmm23, %ymm21 {%k7}
// CHECK: encoding: [0x62,0xa2,0x7d,0x2f,0x20,0xef]
vpmovsxbw %xmm23, %ymm21 {%k7}
// CHECK: vpmovsxbw %xmm23, %ymm21 {%k7} {z}
// CHECK: encoding: [0x62,0xa2,0x7d,0xaf,0x20,0xef]
vpmovsxbw %xmm23, %ymm21 {%k7} {z}
// CHECK: vpmovsxbw (%rcx), %ymm21
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x20,0x29]
vpmovsxbw (%rcx), %ymm21
// CHECK: vpmovsxbw 291(%rax,%r14,8), %ymm21
// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x20,0xac,0xf0,0x23,0x01,0x00,0x00]
vpmovsxbw 291(%rax,%r14,8), %ymm21
// CHECK: vpmovsxbw 2032(%rdx), %ymm21
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x20,0x6a,0x7f]
vpmovsxbw 2032(%rdx), %ymm21
// CHECK: vpmovsxbw 2048(%rdx), %ymm21
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x20,0xaa,0x00,0x08,0x00,0x00]
vpmovsxbw 2048(%rdx), %ymm21
// CHECK: vpmovsxbw -2048(%rdx), %ymm21
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x20,0x6a,0x80]
vpmovsxbw -2048(%rdx), %ymm21
// CHECK: vpmovsxbw -2064(%rdx), %ymm21
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x20,0xaa,0xf0,0xf7,0xff,0xff]
vpmovsxbw -2064(%rdx), %ymm21
// CHECK: vpmovzxbw %xmm29, %xmm30
// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x30,0xf5]
vpmovzxbw %xmm29, %xmm30
// CHECK: vpmovzxbw %xmm29, %xmm30 {%k7}
// CHECK: encoding: [0x62,0x02,0x7d,0x0f,0x30,0xf5]
vpmovzxbw %xmm29, %xmm30 {%k7}
// CHECK: vpmovzxbw %xmm29, %xmm30 {%k7} {z}
// CHECK: encoding: [0x62,0x02,0x7d,0x8f,0x30,0xf5]
vpmovzxbw %xmm29, %xmm30 {%k7} {z}
// CHECK: vpmovzxbw (%rcx), %xmm30
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x30,0x31]
vpmovzxbw (%rcx), %xmm30
// CHECK: vpmovzxbw 291(%rax,%r14,8), %xmm30
// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x30,0xb4,0xf0,0x23,0x01,0x00,0x00]
vpmovzxbw 291(%rax,%r14,8), %xmm30
// CHECK: vpmovzxbw 1016(%rdx), %xmm30
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x30,0x72,0x7f]
vpmovzxbw 1016(%rdx), %xmm30
// CHECK: vpmovzxbw 1024(%rdx), %xmm30
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x30,0xb2,0x00,0x04,0x00,0x00]
vpmovzxbw 1024(%rdx), %xmm30
// CHECK: vpmovzxbw -1024(%rdx), %xmm30
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x30,0x72,0x80]
vpmovzxbw -1024(%rdx), %xmm30
// CHECK: vpmovzxbw -1032(%rdx), %xmm30
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x30,0xb2,0xf8,0xfb,0xff,0xff]
vpmovzxbw -1032(%rdx), %xmm30
// CHECK: vpmovzxbw %xmm29, %ymm22
// CHECK: encoding: [0x62,0x82,0x7d,0x28,0x30,0xf5]
vpmovzxbw %xmm29, %ymm22
// CHECK: vpmovzxbw %xmm29, %ymm22 {%k2}
// CHECK: encoding: [0x62,0x82,0x7d,0x2a,0x30,0xf5]
vpmovzxbw %xmm29, %ymm22 {%k2}
// CHECK: vpmovzxbw %xmm29, %ymm22 {%k2} {z}
// CHECK: encoding: [0x62,0x82,0x7d,0xaa,0x30,0xf5]
vpmovzxbw %xmm29, %ymm22 {%k2} {z}
// CHECK: vpmovzxbw (%rcx), %ymm22
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x30,0x31]
vpmovzxbw (%rcx), %ymm22
// CHECK: vpmovzxbw 291(%rax,%r14,8), %ymm22
// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x30,0xb4,0xf0,0x23,0x01,0x00,0x00]
vpmovzxbw 291(%rax,%r14,8), %ymm22
// CHECK: vpmovzxbw 2032(%rdx), %ymm22
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x30,0x72,0x7f]
vpmovzxbw 2032(%rdx), %ymm22
// CHECK: vpmovzxbw 2048(%rdx), %ymm22
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x30,0xb2,0x00,0x08,0x00,0x00]
vpmovzxbw 2048(%rdx), %ymm22
// CHECK: vpmovzxbw -2048(%rdx), %ymm22
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x30,0x72,0x80]
vpmovzxbw -2048(%rdx), %ymm22
// CHECK: vpmovzxbw -2064(%rdx), %ymm22
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x30,0xb2,0xf0,0xf7,0xff,0xff]
vpmovzxbw -2064(%rdx), %ymm22
// CHECK: vpmullw %xmm26, %xmm19, %xmm29
// CHECK: encoding: [0x62,0x01,0x65,0x00,0xd5,0xea]
vpmullw %xmm26, %xmm19, %xmm29

View File

@ -4524,6 +4524,726 @@
// CHECK: encoding: [0x62,0x62,0xd5,0x30,0x3b,0xaa,0xf8,0xfb,0xff,0xff]
vpminuq -1032(%rdx){1to4}, %ymm21, %ymm29
// CHECK: vpmovsxbd %xmm28, %xmm24
// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x21,0xc4]
vpmovsxbd %xmm28, %xmm24
// CHECK: vpmovsxbd %xmm28, %xmm24 {%k1}
// CHECK: encoding: [0x62,0x02,0x7d,0x09,0x21,0xc4]
vpmovsxbd %xmm28, %xmm24 {%k1}
// CHECK: vpmovsxbd %xmm28, %xmm24 {%k1} {z}
// CHECK: encoding: [0x62,0x02,0x7d,0x89,0x21,0xc4]
vpmovsxbd %xmm28, %xmm24 {%k1} {z}
// CHECK: vpmovsxbd (%rcx), %xmm24
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x21,0x01]
vpmovsxbd (%rcx), %xmm24
// CHECK: vpmovsxbd 291(%rax,%r14,8), %xmm24
// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x21,0x84,0xf0,0x23,0x01,0x00,0x00]
vpmovsxbd 291(%rax,%r14,8), %xmm24
// CHECK: vpmovsxbd 508(%rdx), %xmm24
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x21,0x42,0x7f]
vpmovsxbd 508(%rdx), %xmm24
// CHECK: vpmovsxbd 512(%rdx), %xmm24
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x21,0x82,0x00,0x02,0x00,0x00]
vpmovsxbd 512(%rdx), %xmm24
// CHECK: vpmovsxbd -512(%rdx), %xmm24
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x21,0x42,0x80]
vpmovsxbd -512(%rdx), %xmm24
// CHECK: vpmovsxbd -516(%rdx), %xmm24
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x21,0x82,0xfc,0xfd,0xff,0xff]
vpmovsxbd -516(%rdx), %xmm24
// CHECK: vpmovsxbd %xmm20, %ymm24
// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x21,0xc4]
vpmovsxbd %xmm20, %ymm24
// CHECK: vpmovsxbd %xmm20, %ymm24 {%k3}
// CHECK: encoding: [0x62,0x22,0x7d,0x2b,0x21,0xc4]
vpmovsxbd %xmm20, %ymm24 {%k3}
// CHECK: vpmovsxbd %xmm20, %ymm24 {%k3} {z}
// CHECK: encoding: [0x62,0x22,0x7d,0xab,0x21,0xc4]
vpmovsxbd %xmm20, %ymm24 {%k3} {z}
// CHECK: vpmovsxbd (%rcx), %ymm24
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x21,0x01]
vpmovsxbd (%rcx), %ymm24
// CHECK: vpmovsxbd 291(%rax,%r14,8), %ymm24
// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x21,0x84,0xf0,0x23,0x01,0x00,0x00]
vpmovsxbd 291(%rax,%r14,8), %ymm24
// CHECK: vpmovsxbd 1016(%rdx), %ymm24
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x21,0x42,0x7f]
vpmovsxbd 1016(%rdx), %ymm24
// CHECK: vpmovsxbd 1024(%rdx), %ymm24
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x21,0x82,0x00,0x04,0x00,0x00]
vpmovsxbd 1024(%rdx), %ymm24
// CHECK: vpmovsxbd -1024(%rdx), %ymm24
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x21,0x42,0x80]
vpmovsxbd -1024(%rdx), %ymm24
// CHECK: vpmovsxbd -1032(%rdx), %ymm24
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x21,0x82,0xf8,0xfb,0xff,0xff]
vpmovsxbd -1032(%rdx), %ymm24
// CHECK: vpmovsxbq %xmm22, %xmm17
// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x22,0xce]
vpmovsxbq %xmm22, %xmm17
// CHECK: vpmovsxbq %xmm22, %xmm17 {%k5}
// CHECK: encoding: [0x62,0xa2,0x7d,0x0d,0x22,0xce]
vpmovsxbq %xmm22, %xmm17 {%k5}
// CHECK: vpmovsxbq %xmm22, %xmm17 {%k5} {z}
// CHECK: encoding: [0x62,0xa2,0x7d,0x8d,0x22,0xce]
vpmovsxbq %xmm22, %xmm17 {%k5} {z}
// CHECK: vpmovsxbq (%rcx), %xmm17
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x22,0x09]
vpmovsxbq (%rcx), %xmm17
// CHECK: vpmovsxbq 291(%rax,%r14,8), %xmm17
// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x22,0x8c,0xf0,0x23,0x01,0x00,0x00]
vpmovsxbq 291(%rax,%r14,8), %xmm17
// CHECK: vpmovsxbq 254(%rdx), %xmm17
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x22,0x4a,0x7f]
vpmovsxbq 254(%rdx), %xmm17
// CHECK: vpmovsxbq 256(%rdx), %xmm17
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x22,0x8a,0x00,0x01,0x00,0x00]
vpmovsxbq 256(%rdx), %xmm17
// CHECK: vpmovsxbq -256(%rdx), %xmm17
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x22,0x4a,0x80]
vpmovsxbq -256(%rdx), %xmm17
// CHECK: vpmovsxbq -258(%rdx), %xmm17
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x22,0x8a,0xfe,0xfe,0xff,0xff]
vpmovsxbq -258(%rdx), %xmm17
// CHECK: vpmovsxbq %xmm26, %ymm28
// CHECK: encoding: [0x62,0x02,0x7d,0x28,0x22,0xe2]
vpmovsxbq %xmm26, %ymm28
// CHECK: vpmovsxbq %xmm26, %ymm28 {%k5}
// CHECK: encoding: [0x62,0x02,0x7d,0x2d,0x22,0xe2]
vpmovsxbq %xmm26, %ymm28 {%k5}
// CHECK: vpmovsxbq %xmm26, %ymm28 {%k5} {z}
// CHECK: encoding: [0x62,0x02,0x7d,0xad,0x22,0xe2]
vpmovsxbq %xmm26, %ymm28 {%k5} {z}
// CHECK: vpmovsxbq (%rcx), %ymm28
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x22,0x21]
vpmovsxbq (%rcx), %ymm28
// CHECK: vpmovsxbq 291(%rax,%r14,8), %ymm28
// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x22,0xa4,0xf0,0x23,0x01,0x00,0x00]
vpmovsxbq 291(%rax,%r14,8), %ymm28
// CHECK: vpmovsxbq 508(%rdx), %ymm28
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x22,0x62,0x7f]
vpmovsxbq 508(%rdx), %ymm28
// CHECK: vpmovsxbq 512(%rdx), %ymm28
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x22,0xa2,0x00,0x02,0x00,0x00]
vpmovsxbq 512(%rdx), %ymm28
// CHECK: vpmovsxbq -512(%rdx), %ymm28
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x22,0x62,0x80]
vpmovsxbq -512(%rdx), %ymm28
// CHECK: vpmovsxbq -516(%rdx), %ymm28
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x22,0xa2,0xfc,0xfd,0xff,0xff]
vpmovsxbq -516(%rdx), %ymm28
// CHECK: vpmovsxdq %xmm26, %xmm23
// CHECK: encoding: [0x62,0x82,0x7d,0x08,0x25,0xfa]
vpmovsxdq %xmm26, %xmm23
// CHECK: vpmovsxdq %xmm26, %xmm23 {%k7}
// CHECK: encoding: [0x62,0x82,0x7d,0x0f,0x25,0xfa]
vpmovsxdq %xmm26, %xmm23 {%k7}
// CHECK: vpmovsxdq %xmm26, %xmm23 {%k7} {z}
// CHECK: encoding: [0x62,0x82,0x7d,0x8f,0x25,0xfa]
vpmovsxdq %xmm26, %xmm23 {%k7} {z}
// CHECK: vpmovsxdq (%rcx), %xmm23
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x25,0x39]
vpmovsxdq (%rcx), %xmm23
// CHECK: vpmovsxdq 291(%rax,%r14,8), %xmm23
// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x25,0xbc,0xf0,0x23,0x01,0x00,0x00]
vpmovsxdq 291(%rax,%r14,8), %xmm23
// CHECK: vpmovsxdq 1016(%rdx), %xmm23
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x25,0x7a,0x7f]
vpmovsxdq 1016(%rdx), %xmm23
// CHECK: vpmovsxdq 1024(%rdx), %xmm23
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x25,0xba,0x00,0x04,0x00,0x00]
vpmovsxdq 1024(%rdx), %xmm23
// CHECK: vpmovsxdq -1024(%rdx), %xmm23
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x25,0x7a,0x80]
vpmovsxdq -1024(%rdx), %xmm23
// CHECK: vpmovsxdq -1032(%rdx), %xmm23
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x25,0xba,0xf8,0xfb,0xff,0xff]
vpmovsxdq -1032(%rdx), %xmm23
// CHECK: vpmovsxdq %xmm28, %ymm18
// CHECK: encoding: [0x62,0x82,0x7d,0x28,0x25,0xd4]
vpmovsxdq %xmm28, %ymm18
// CHECK: vpmovsxdq %xmm28, %ymm18 {%k7}
// CHECK: encoding: [0x62,0x82,0x7d,0x2f,0x25,0xd4]
vpmovsxdq %xmm28, %ymm18 {%k7}
// CHECK: vpmovsxdq %xmm28, %ymm18 {%k7} {z}
// CHECK: encoding: [0x62,0x82,0x7d,0xaf,0x25,0xd4]
vpmovsxdq %xmm28, %ymm18 {%k7} {z}
// CHECK: vpmovsxdq (%rcx), %ymm18
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x25,0x11]
vpmovsxdq (%rcx), %ymm18
// CHECK: vpmovsxdq 291(%rax,%r14,8), %ymm18
// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x25,0x94,0xf0,0x23,0x01,0x00,0x00]
vpmovsxdq 291(%rax,%r14,8), %ymm18
// CHECK: vpmovsxdq 2032(%rdx), %ymm18
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x25,0x52,0x7f]
vpmovsxdq 2032(%rdx), %ymm18
// CHECK: vpmovsxdq 2048(%rdx), %ymm18
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x25,0x92,0x00,0x08,0x00,0x00]
vpmovsxdq 2048(%rdx), %ymm18
// CHECK: vpmovsxdq -2048(%rdx), %ymm18
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x25,0x52,0x80]
vpmovsxdq -2048(%rdx), %ymm18
// CHECK: vpmovsxdq -2064(%rdx), %ymm18
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x25,0x92,0xf0,0xf7,0xff,0xff]
vpmovsxdq -2064(%rdx), %ymm18
// CHECK: vpmovsxwd %xmm18, %xmm17
// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x23,0xca]
vpmovsxwd %xmm18, %xmm17
// CHECK: vpmovsxwd %xmm18, %xmm17 {%k4}
// CHECK: encoding: [0x62,0xa2,0x7d,0x0c,0x23,0xca]
vpmovsxwd %xmm18, %xmm17 {%k4}
// CHECK: vpmovsxwd %xmm18, %xmm17 {%k4} {z}
// CHECK: encoding: [0x62,0xa2,0x7d,0x8c,0x23,0xca]
vpmovsxwd %xmm18, %xmm17 {%k4} {z}
// CHECK: vpmovsxwd (%rcx), %xmm17
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x23,0x09]
vpmovsxwd (%rcx), %xmm17
// CHECK: vpmovsxwd 291(%rax,%r14,8), %xmm17
// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x23,0x8c,0xf0,0x23,0x01,0x00,0x00]
vpmovsxwd 291(%rax,%r14,8), %xmm17
// CHECK: vpmovsxwd 1016(%rdx), %xmm17
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x23,0x4a,0x7f]
vpmovsxwd 1016(%rdx), %xmm17
// CHECK: vpmovsxwd 1024(%rdx), %xmm17
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x23,0x8a,0x00,0x04,0x00,0x00]
vpmovsxwd 1024(%rdx), %xmm17
// CHECK: vpmovsxwd -1024(%rdx), %xmm17
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x23,0x4a,0x80]
vpmovsxwd -1024(%rdx), %xmm17
// CHECK: vpmovsxwd -1032(%rdx), %xmm17
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x23,0x8a,0xf8,0xfb,0xff,0xff]
vpmovsxwd -1032(%rdx), %xmm17
// CHECK: vpmovsxwd %xmm25, %ymm21
// CHECK: encoding: [0x62,0x82,0x7d,0x28,0x23,0xe9]
vpmovsxwd %xmm25, %ymm21
// CHECK: vpmovsxwd %xmm25, %ymm21 {%k5}
// CHECK: encoding: [0x62,0x82,0x7d,0x2d,0x23,0xe9]
vpmovsxwd %xmm25, %ymm21 {%k5}
// CHECK: vpmovsxwd %xmm25, %ymm21 {%k5} {z}
// CHECK: encoding: [0x62,0x82,0x7d,0xad,0x23,0xe9]
vpmovsxwd %xmm25, %ymm21 {%k5} {z}
// CHECK: vpmovsxwd (%rcx), %ymm21
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x23,0x29]
vpmovsxwd (%rcx), %ymm21
// CHECK: vpmovsxwd 291(%rax,%r14,8), %ymm21
// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x23,0xac,0xf0,0x23,0x01,0x00,0x00]
vpmovsxwd 291(%rax,%r14,8), %ymm21
// CHECK: vpmovsxwd 2032(%rdx), %ymm21
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x23,0x6a,0x7f]
vpmovsxwd 2032(%rdx), %ymm21
// CHECK: vpmovsxwd 2048(%rdx), %ymm21
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x23,0xaa,0x00,0x08,0x00,0x00]
vpmovsxwd 2048(%rdx), %ymm21
// CHECK: vpmovsxwd -2048(%rdx), %ymm21
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x23,0x6a,0x80]
vpmovsxwd -2048(%rdx), %ymm21
// CHECK: vpmovsxwd -2064(%rdx), %ymm21
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x23,0xaa,0xf0,0xf7,0xff,0xff]
vpmovsxwd -2064(%rdx), %ymm21
// CHECK: vpmovsxwq %xmm20, %xmm29
// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x24,0xec]
vpmovsxwq %xmm20, %xmm29
// CHECK: vpmovsxwq %xmm20, %xmm29 {%k6}
// CHECK: encoding: [0x62,0x22,0x7d,0x0e,0x24,0xec]
vpmovsxwq %xmm20, %xmm29 {%k6}
// CHECK: vpmovsxwq %xmm20, %xmm29 {%k6} {z}
// CHECK: encoding: [0x62,0x22,0x7d,0x8e,0x24,0xec]
vpmovsxwq %xmm20, %xmm29 {%k6} {z}
// CHECK: vpmovsxwq (%rcx), %xmm29
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x24,0x29]
vpmovsxwq (%rcx), %xmm29
// CHECK: vpmovsxwq 291(%rax,%r14,8), %xmm29
// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x24,0xac,0xf0,0x23,0x01,0x00,0x00]
vpmovsxwq 291(%rax,%r14,8), %xmm29
// CHECK: vpmovsxwq 508(%rdx), %xmm29
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x24,0x6a,0x7f]
vpmovsxwq 508(%rdx), %xmm29
// CHECK: vpmovsxwq 512(%rdx), %xmm29
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x24,0xaa,0x00,0x02,0x00,0x00]
vpmovsxwq 512(%rdx), %xmm29
// CHECK: vpmovsxwq -512(%rdx), %xmm29
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x24,0x6a,0x80]
vpmovsxwq -512(%rdx), %xmm29
// CHECK: vpmovsxwq -516(%rdx), %xmm29
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x24,0xaa,0xfc,0xfd,0xff,0xff]
vpmovsxwq -516(%rdx), %xmm29
// CHECK: vpmovsxwq %xmm17, %ymm23
// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x24,0xf9]
vpmovsxwq %xmm17, %ymm23
// CHECK: vpmovsxwq %xmm17, %ymm23 {%k5}
// CHECK: encoding: [0x62,0xa2,0x7d,0x2d,0x24,0xf9]
vpmovsxwq %xmm17, %ymm23 {%k5}
// CHECK: vpmovsxwq %xmm17, %ymm23 {%k5} {z}
// CHECK: encoding: [0x62,0xa2,0x7d,0xad,0x24,0xf9]
vpmovsxwq %xmm17, %ymm23 {%k5} {z}
// CHECK: vpmovsxwq (%rcx), %ymm23
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x24,0x39]
vpmovsxwq (%rcx), %ymm23
// CHECK: vpmovsxwq 291(%rax,%r14,8), %ymm23
// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x24,0xbc,0xf0,0x23,0x01,0x00,0x00]
vpmovsxwq 291(%rax,%r14,8), %ymm23
// CHECK: vpmovsxwq 1016(%rdx), %ymm23
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x24,0x7a,0x7f]
vpmovsxwq 1016(%rdx), %ymm23
// CHECK: vpmovsxwq 1024(%rdx), %ymm23
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x24,0xba,0x00,0x04,0x00,0x00]
vpmovsxwq 1024(%rdx), %ymm23
// CHECK: vpmovsxwq -1024(%rdx), %ymm23
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x24,0x7a,0x80]
vpmovsxwq -1024(%rdx), %ymm23
// CHECK: vpmovsxwq -1032(%rdx), %ymm23
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x24,0xba,0xf8,0xfb,0xff,0xff]
vpmovsxwq -1032(%rdx), %ymm23
// CHECK: vpmovzxbd %xmm17, %xmm24
// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x31,0xc1]
vpmovzxbd %xmm17, %xmm24
// CHECK: vpmovzxbd %xmm17, %xmm24 {%k6}
// CHECK: encoding: [0x62,0x22,0x7d,0x0e,0x31,0xc1]
vpmovzxbd %xmm17, %xmm24 {%k6}
// CHECK: vpmovzxbd %xmm17, %xmm24 {%k6} {z}
// CHECK: encoding: [0x62,0x22,0x7d,0x8e,0x31,0xc1]
vpmovzxbd %xmm17, %xmm24 {%k6} {z}
// CHECK: vpmovzxbd (%rcx), %xmm24
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x31,0x01]
vpmovzxbd (%rcx), %xmm24
// CHECK: vpmovzxbd 291(%rax,%r14,8), %xmm24
// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x31,0x84,0xf0,0x23,0x01,0x00,0x00]
vpmovzxbd 291(%rax,%r14,8), %xmm24
// CHECK: vpmovzxbd 508(%rdx), %xmm24
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x31,0x42,0x7f]
vpmovzxbd 508(%rdx), %xmm24
// CHECK: vpmovzxbd 512(%rdx), %xmm24
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x31,0x82,0x00,0x02,0x00,0x00]
vpmovzxbd 512(%rdx), %xmm24
// CHECK: vpmovzxbd -512(%rdx), %xmm24
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x31,0x42,0x80]
vpmovzxbd -512(%rdx), %xmm24
// CHECK: vpmovzxbd -516(%rdx), %xmm24
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x31,0x82,0xfc,0xfd,0xff,0xff]
vpmovzxbd -516(%rdx), %xmm24
// CHECK: vpmovzxbd %xmm17, %ymm27
// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x31,0xd9]
vpmovzxbd %xmm17, %ymm27
// CHECK: vpmovzxbd %xmm17, %ymm27 {%k1}
// CHECK: encoding: [0x62,0x22,0x7d,0x29,0x31,0xd9]
vpmovzxbd %xmm17, %ymm27 {%k1}
// CHECK: vpmovzxbd %xmm17, %ymm27 {%k1} {z}
// CHECK: encoding: [0x62,0x22,0x7d,0xa9,0x31,0xd9]
vpmovzxbd %xmm17, %ymm27 {%k1} {z}
// CHECK: vpmovzxbd (%rcx), %ymm27
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x31,0x19]
vpmovzxbd (%rcx), %ymm27
// CHECK: vpmovzxbd 291(%rax,%r14,8), %ymm27
// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x31,0x9c,0xf0,0x23,0x01,0x00,0x00]
vpmovzxbd 291(%rax,%r14,8), %ymm27
// CHECK: vpmovzxbd 1016(%rdx), %ymm27
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x31,0x5a,0x7f]
vpmovzxbd 1016(%rdx), %ymm27
// CHECK: vpmovzxbd 1024(%rdx), %ymm27
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x31,0x9a,0x00,0x04,0x00,0x00]
vpmovzxbd 1024(%rdx), %ymm27
// CHECK: vpmovzxbd -1024(%rdx), %ymm27
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x31,0x5a,0x80]
vpmovzxbd -1024(%rdx), %ymm27
// CHECK: vpmovzxbd -1032(%rdx), %ymm27
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x31,0x9a,0xf8,0xfb,0xff,0xff]
vpmovzxbd -1032(%rdx), %ymm27
// CHECK: vpmovzxbq %xmm19, %xmm19
// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x32,0xdb]
vpmovzxbq %xmm19, %xmm19
// CHECK: vpmovzxbq %xmm19, %xmm19 {%k1}
// CHECK: encoding: [0x62,0xa2,0x7d,0x09,0x32,0xdb]
vpmovzxbq %xmm19, %xmm19 {%k1}
// CHECK: vpmovzxbq %xmm19, %xmm19 {%k1} {z}
// CHECK: encoding: [0x62,0xa2,0x7d,0x89,0x32,0xdb]
vpmovzxbq %xmm19, %xmm19 {%k1} {z}
// CHECK: vpmovzxbq (%rcx), %xmm19
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x32,0x19]
vpmovzxbq (%rcx), %xmm19
// CHECK: vpmovzxbq 291(%rax,%r14,8), %xmm19
// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x32,0x9c,0xf0,0x23,0x01,0x00,0x00]
vpmovzxbq 291(%rax,%r14,8), %xmm19
// CHECK: vpmovzxbq 254(%rdx), %xmm19
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x32,0x5a,0x7f]
vpmovzxbq 254(%rdx), %xmm19
// CHECK: vpmovzxbq 256(%rdx), %xmm19
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x32,0x9a,0x00,0x01,0x00,0x00]
vpmovzxbq 256(%rdx), %xmm19
// CHECK: vpmovzxbq -256(%rdx), %xmm19
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x32,0x5a,0x80]
vpmovzxbq -256(%rdx), %xmm19
// CHECK: vpmovzxbq -258(%rdx), %xmm19
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x32,0x9a,0xfe,0xfe,0xff,0xff]
vpmovzxbq -258(%rdx), %xmm19
// CHECK: vpmovzxbq %xmm19, %ymm24
// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x32,0xc3]
vpmovzxbq %xmm19, %ymm24
// CHECK: vpmovzxbq %xmm19, %ymm24 {%k2}
// CHECK: encoding: [0x62,0x22,0x7d,0x2a,0x32,0xc3]
vpmovzxbq %xmm19, %ymm24 {%k2}
// CHECK: vpmovzxbq %xmm19, %ymm24 {%k2} {z}
// CHECK: encoding: [0x62,0x22,0x7d,0xaa,0x32,0xc3]
vpmovzxbq %xmm19, %ymm24 {%k2} {z}
// CHECK: vpmovzxbq (%rcx), %ymm24
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x32,0x01]
vpmovzxbq (%rcx), %ymm24
// CHECK: vpmovzxbq 291(%rax,%r14,8), %ymm24
// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x32,0x84,0xf0,0x23,0x01,0x00,0x00]
vpmovzxbq 291(%rax,%r14,8), %ymm24
// CHECK: vpmovzxbq 508(%rdx), %ymm24
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x32,0x42,0x7f]
vpmovzxbq 508(%rdx), %ymm24
// CHECK: vpmovzxbq 512(%rdx), %ymm24
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x32,0x82,0x00,0x02,0x00,0x00]
vpmovzxbq 512(%rdx), %ymm24
// CHECK: vpmovzxbq -512(%rdx), %ymm24
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x32,0x42,0x80]
vpmovzxbq -512(%rdx), %ymm24
// CHECK: vpmovzxbq -516(%rdx), %ymm24
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x32,0x82,0xfc,0xfd,0xff,0xff]
vpmovzxbq -516(%rdx), %ymm24
// CHECK: vpmovzxdq %xmm21, %xmm25
// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x35,0xcd]
vpmovzxdq %xmm21, %xmm25
// CHECK: vpmovzxdq %xmm21, %xmm25 {%k7}
// CHECK: encoding: [0x62,0x22,0x7d,0x0f,0x35,0xcd]
vpmovzxdq %xmm21, %xmm25 {%k7}
// CHECK: vpmovzxdq %xmm21, %xmm25 {%k7} {z}
// CHECK: encoding: [0x62,0x22,0x7d,0x8f,0x35,0xcd]
vpmovzxdq %xmm21, %xmm25 {%k7} {z}
// CHECK: vpmovzxdq (%rcx), %xmm25
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x35,0x09]
vpmovzxdq (%rcx), %xmm25
// CHECK: vpmovzxdq 291(%rax,%r14,8), %xmm25
// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x35,0x8c,0xf0,0x23,0x01,0x00,0x00]
vpmovzxdq 291(%rax,%r14,8), %xmm25
// CHECK: vpmovzxdq 1016(%rdx), %xmm25
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x35,0x4a,0x7f]
vpmovzxdq 1016(%rdx), %xmm25
// CHECK: vpmovzxdq 1024(%rdx), %xmm25
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x35,0x8a,0x00,0x04,0x00,0x00]
vpmovzxdq 1024(%rdx), %xmm25
// CHECK: vpmovzxdq -1024(%rdx), %xmm25
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x35,0x4a,0x80]
vpmovzxdq -1024(%rdx), %xmm25
// CHECK: vpmovzxdq -1032(%rdx), %xmm25
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x35,0x8a,0xf8,0xfb,0xff,0xff]
vpmovzxdq -1032(%rdx), %xmm25
// CHECK: vpmovzxdq %xmm22, %ymm28
// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x35,0xe6]
vpmovzxdq %xmm22, %ymm28
// CHECK: vpmovzxdq %xmm22, %ymm28 {%k7}
// CHECK: encoding: [0x62,0x22,0x7d,0x2f,0x35,0xe6]
vpmovzxdq %xmm22, %ymm28 {%k7}
// CHECK: vpmovzxdq %xmm22, %ymm28 {%k7} {z}
// CHECK: encoding: [0x62,0x22,0x7d,0xaf,0x35,0xe6]
vpmovzxdq %xmm22, %ymm28 {%k7} {z}
// CHECK: vpmovzxdq (%rcx), %ymm28
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x35,0x21]
vpmovzxdq (%rcx), %ymm28
// CHECK: vpmovzxdq 291(%rax,%r14,8), %ymm28
// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x35,0xa4,0xf0,0x23,0x01,0x00,0x00]
vpmovzxdq 291(%rax,%r14,8), %ymm28
// CHECK: vpmovzxdq 2032(%rdx), %ymm28
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x35,0x62,0x7f]
vpmovzxdq 2032(%rdx), %ymm28
// CHECK: vpmovzxdq 2048(%rdx), %ymm28
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x35,0xa2,0x00,0x08,0x00,0x00]
vpmovzxdq 2048(%rdx), %ymm28
// CHECK: vpmovzxdq -2048(%rdx), %ymm28
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x35,0x62,0x80]
vpmovzxdq -2048(%rdx), %ymm28
// CHECK: vpmovzxdq -2064(%rdx), %ymm28
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x35,0xa2,0xf0,0xf7,0xff,0xff]
vpmovzxdq -2064(%rdx), %ymm28
// CHECK: vpmovzxwd %xmm17, %xmm24
// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x33,0xc1]
vpmovzxwd %xmm17, %xmm24
// CHECK: vpmovzxwd %xmm17, %xmm24 {%k4}
// CHECK: encoding: [0x62,0x22,0x7d,0x0c,0x33,0xc1]
vpmovzxwd %xmm17, %xmm24 {%k4}
// CHECK: vpmovzxwd %xmm17, %xmm24 {%k4} {z}
// CHECK: encoding: [0x62,0x22,0x7d,0x8c,0x33,0xc1]
vpmovzxwd %xmm17, %xmm24 {%k4} {z}
// CHECK: vpmovzxwd (%rcx), %xmm24
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x33,0x01]
vpmovzxwd (%rcx), %xmm24
// CHECK: vpmovzxwd 291(%rax,%r14,8), %xmm24
// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x33,0x84,0xf0,0x23,0x01,0x00,0x00]
vpmovzxwd 291(%rax,%r14,8), %xmm24
// CHECK: vpmovzxwd 1016(%rdx), %xmm24
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x33,0x42,0x7f]
vpmovzxwd 1016(%rdx), %xmm24
// CHECK: vpmovzxwd 1024(%rdx), %xmm24
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x33,0x82,0x00,0x04,0x00,0x00]
vpmovzxwd 1024(%rdx), %xmm24
// CHECK: vpmovzxwd -1024(%rdx), %xmm24
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x33,0x42,0x80]
vpmovzxwd -1024(%rdx), %xmm24
// CHECK: vpmovzxwd -1032(%rdx), %xmm24
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x33,0x82,0xf8,0xfb,0xff,0xff]
vpmovzxwd -1032(%rdx), %xmm24
// CHECK: vpmovzxwd %xmm29, %ymm26
// CHECK: encoding: [0x62,0x02,0x7d,0x28,0x33,0xd5]
vpmovzxwd %xmm29, %ymm26
// CHECK: vpmovzxwd %xmm29, %ymm26 {%k5}
// CHECK: encoding: [0x62,0x02,0x7d,0x2d,0x33,0xd5]
vpmovzxwd %xmm29, %ymm26 {%k5}
// CHECK: vpmovzxwd %xmm29, %ymm26 {%k5} {z}
// CHECK: encoding: [0x62,0x02,0x7d,0xad,0x33,0xd5]
vpmovzxwd %xmm29, %ymm26 {%k5} {z}
// CHECK: vpmovzxwd (%rcx), %ymm26
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x33,0x11]
vpmovzxwd (%rcx), %ymm26
// CHECK: vpmovzxwd 291(%rax,%r14,8), %ymm26
// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x33,0x94,0xf0,0x23,0x01,0x00,0x00]
vpmovzxwd 291(%rax,%r14,8), %ymm26
// CHECK: vpmovzxwd 2032(%rdx), %ymm26
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x33,0x52,0x7f]
vpmovzxwd 2032(%rdx), %ymm26
// CHECK: vpmovzxwd 2048(%rdx), %ymm26
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x33,0x92,0x00,0x08,0x00,0x00]
vpmovzxwd 2048(%rdx), %ymm26
// CHECK: vpmovzxwd -2048(%rdx), %ymm26
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x33,0x52,0x80]
vpmovzxwd -2048(%rdx), %ymm26
// CHECK: vpmovzxwd -2064(%rdx), %ymm26
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x33,0x92,0xf0,0xf7,0xff,0xff]
vpmovzxwd -2064(%rdx), %ymm26
// CHECK: vpmovzxwq %xmm20, %xmm29
// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x34,0xec]
vpmovzxwq %xmm20, %xmm29
// CHECK: vpmovzxwq %xmm20, %xmm29 {%k2}
// CHECK: encoding: [0x62,0x22,0x7d,0x0a,0x34,0xec]
vpmovzxwq %xmm20, %xmm29 {%k2}
// CHECK: vpmovzxwq %xmm20, %xmm29 {%k2} {z}
// CHECK: encoding: [0x62,0x22,0x7d,0x8a,0x34,0xec]
vpmovzxwq %xmm20, %xmm29 {%k2} {z}
// CHECK: vpmovzxwq (%rcx), %xmm29
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x34,0x29]
vpmovzxwq (%rcx), %xmm29
// CHECK: vpmovzxwq 291(%rax,%r14,8), %xmm29
// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x34,0xac,0xf0,0x23,0x01,0x00,0x00]
vpmovzxwq 291(%rax,%r14,8), %xmm29
// CHECK: vpmovzxwq 508(%rdx), %xmm29
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x34,0x6a,0x7f]
vpmovzxwq 508(%rdx), %xmm29
// CHECK: vpmovzxwq 512(%rdx), %xmm29
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x34,0xaa,0x00,0x02,0x00,0x00]
vpmovzxwq 512(%rdx), %xmm29
// CHECK: vpmovzxwq -512(%rdx), %xmm29
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x34,0x6a,0x80]
vpmovzxwq -512(%rdx), %xmm29
// CHECK: vpmovzxwq -516(%rdx), %xmm29
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x34,0xaa,0xfc,0xfd,0xff,0xff]
vpmovzxwq -516(%rdx), %xmm29
// CHECK: vpmovzxwq %xmm25, %ymm18
// CHECK: encoding: [0x62,0x82,0x7d,0x28,0x34,0xd1]
vpmovzxwq %xmm25, %ymm18
// CHECK: vpmovzxwq %xmm25, %ymm18 {%k1}
// CHECK: encoding: [0x62,0x82,0x7d,0x29,0x34,0xd1]
vpmovzxwq %xmm25, %ymm18 {%k1}
// CHECK: vpmovzxwq %xmm25, %ymm18 {%k1} {z}
// CHECK: encoding: [0x62,0x82,0x7d,0xa9,0x34,0xd1]
vpmovzxwq %xmm25, %ymm18 {%k1} {z}
// CHECK: vpmovzxwq (%rcx), %ymm18
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x34,0x11]
vpmovzxwq (%rcx), %ymm18
// CHECK: vpmovzxwq 291(%rax,%r14,8), %ymm18
// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x34,0x94,0xf0,0x23,0x01,0x00,0x00]
vpmovzxwq 291(%rax,%r14,8), %ymm18
// CHECK: vpmovzxwq 1016(%rdx), %ymm18
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x34,0x52,0x7f]
vpmovzxwq 1016(%rdx), %ymm18
// CHECK: vpmovzxwq 1024(%rdx), %ymm18
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x34,0x92,0x00,0x04,0x00,0x00]
vpmovzxwq 1024(%rdx), %ymm18
// CHECK: vpmovzxwq -1024(%rdx), %ymm18
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x34,0x52,0x80]
vpmovzxwq -1024(%rdx), %ymm18
// CHECK: vpmovzxwq -1032(%rdx), %ymm18
// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x34,0x92,0xf8,0xfb,0xff,0xff]
vpmovzxwq -1032(%rdx), %ymm18
// CHECK: vpmulld %xmm24, %xmm19, %xmm25
// CHECK: encoding: [0x62,0x02,0x65,0x00,0x40,0xc8]
vpmulld %xmm24, %xmm19, %xmm25