diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f9b3f1a1e7b0..81008e92868d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8856,6 +8856,37 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); } +static SDValue LowerZERO_EXTEND_AVX512(SDValue Op, + SelectionDAG &DAG) { + MVT VT = Op->getValueType(0).getSimpleVT(); + SDValue In = Op->getOperand(0); + MVT InVT = In.getValueType().getSimpleVT(); + SDLoc DL(Op); + unsigned int NumElts = VT.getVectorNumElements(); + if (NumElts != 8 && NumElts != 16) + return SDValue(); + + if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) + return DAG.getNode(X86ISD::VZEXT, DL, VT, In); + + EVT ExtVT = (NumElts == 8)? MVT::v8i64 : MVT::v16i32; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // Now we have only mask extension + assert(InVT.getVectorElementType() == MVT::i1); + SDValue Cst = DAG.getTargetConstant(1, ExtVT.getScalarType()); + const Constant *C = (dyn_cast(Cst))->getConstantIntValue(); + SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy()); + unsigned Alignment = cast(CP)->getAlignment(); + SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP, + MachinePointerInfo::getConstantPool(), + false, false, false, Alignment); + + SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, DL, ExtVT, In, Ld); + if (VT.is512BitVector()) + return Brcst; + return DAG.getNode(X86ISD::VTRUNC, DL, VT, Brcst); +} + static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { if (Subtarget->hasFp256()) { @@ -8874,6 +8905,9 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget, SDValue In = Op.getOperand(0); MVT SVT = In.getSimpleValueType(); + if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1) + return LowerZERO_EXTEND_AVX512(Op, DAG); + if (Subtarget->hasFp256()) { SDValue Res = LowerAVXExtend(Op, DAG, Subtarget); if (Res.getNode()) @@ -8902,11 +8936,37 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget, SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - MVT VT = Op.getSimpleValueType(); + MVT VT = Op.getSimpleValueType(); SDValue In = Op.getOperand(0); - MVT SVT = In.getSimpleValueType(); + MVT InVT = In.getSimpleValueType(); + assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && + "Invalid TRUNCATE operation"); - if ((VT == MVT::v4i32) && (SVT == MVT::v4i64)) { + if (InVT.is512BitVector() || VT.getVectorElementType() == MVT::i1) { + if (VT.getVectorElementType().getSizeInBits() >=8) + return DAG.getNode(X86ISD::VTRUNC, DL, VT, In); + + assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type"); + unsigned NumElts = InVT.getVectorNumElements(); + assert ((NumElts == 8 || NumElts == 16) && "Unexpected vector type"); + if (InVT.getSizeInBits() < 512) { + MVT ExtVT = (NumElts == 16)? MVT::v16i32 : MVT::v8i64; + In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In); + InVT = ExtVT; + } + SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType()); + const Constant *C = (dyn_cast(Cst))->getConstantIntValue(); + SDValue CP = DAG.getConstantPool(C, getPointerTy()); + unsigned Alignment = cast(CP)->getAlignment(); + SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP, + MachinePointerInfo::getConstantPool(), + false, false, false, Alignment); + SDValue OneV = DAG.getNode(X86ISD::VBROADCAST, DL, InVT, Ld); + SDValue And = DAG.getNode(ISD::AND, DL, InVT, OneV, In); + return DAG.getNode(X86ISD::TESTM, DL, VT, And, And); + } + + if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) { // On AVX2, v4i64 -> v4i32 becomes VPERMD. if (Subtarget->hasInt256()) { static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1}; @@ -8937,7 +8997,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask2); } - if ((VT == MVT::v8i16) && (SVT == MVT::v8i32)) { + if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) { // On AVX2, v8i32 -> v8i16 becomed PSHUFB. if (Subtarget->hasInt256()) { In = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, In); @@ -8995,11 +9055,9 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { } // Handle truncation of V256 to V128 using shuffles. - if (!VT.is128BitVector() || !SVT.is256BitVector()) + if (!VT.is128BitVector() || !InVT.is256BitVector()) return SDValue(); - assert(VT.getVectorNumElements() != SVT.getVectorNumElements() && - "Invalid op"); assert(Subtarget->hasFp256() && "256-bit vector without AVX!"); unsigned NumElems = VT.getVectorNumElements(); @@ -10282,28 +10340,29 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) { MVT InVT = In.getSimpleValueType(); SDLoc dl(Op); - if (InVT.getVectorElementType().getSizeInBits() >=8 && - VT.getVectorElementType().getSizeInBits() >= 32) + unsigned int NumElts = VT.getVectorNumElements(); + if (NumElts != 8 && NumElts != 16) + return SDValue(); + + if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) return DAG.getNode(X86ISD::VSEXT, dl, VT, In); - if (InVT.getVectorElementType() == MVT::i1) { - unsigned int NumElts = InVT.getVectorNumElements(); - assert ((NumElts == 8 || NumElts == 16) && - "Unsupported SIGN_EXTEND operation"); - if (VT.getVectorElementType().getSizeInBits() >= 32) { - Constant *C = - ConstantInt::get(*DAG.getContext(), - (NumElts == 8)? APInt(64, ~0ULL): APInt(32, ~0U)); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy()); - unsigned Alignment = cast(CP)->getAlignment(); - SDValue Ld = DAG.getLoad(VT.getScalarType(), dl, DAG.getEntryNode(), CP, - MachinePointerInfo::getConstantPool(), - false, false, false, Alignment); - return DAG.getNode(X86ISD::VBROADCASTM, dl, VT, In, Ld); - } - } - return SDValue(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type"); + + MVT ExtVT = (NumElts == 8) ? MVT::v8i64 : MVT::v16i32; + Constant *C = ConstantInt::get(*DAG.getContext(), + APInt::getAllOnesValue(ExtVT.getScalarType().getSizeInBits())); + + SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy()); + unsigned Alignment = cast(CP)->getAlignment(); + SDValue Ld = DAG.getLoad(ExtVT.getScalarType(), dl, DAG.getEntryNode(), CP, + MachinePointerInfo::getConstantPool(), + false, false, false, Alignment); + SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, dl, ExtVT, In, Ld); + if (VT.is512BitVector()) + return Brcst; + return DAG.getNode(X86ISD::VTRUNC, dl, VT, Brcst); } static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget, @@ -11142,10 +11201,14 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { case Intrinsic::x86_sse2_max_pd: case Intrinsic::x86_avx_max_ps_256: case Intrinsic::x86_avx_max_pd_256: + case Intrinsic::x86_avx512_max_ps_512: + case Intrinsic::x86_avx512_max_pd_512: case Intrinsic::x86_sse_min_ps: case Intrinsic::x86_sse2_min_pd: case Intrinsic::x86_avx_min_ps_256: - case Intrinsic::x86_avx_min_pd_256: { + case Intrinsic::x86_avx_min_pd_256: + case Intrinsic::x86_avx512_min_ps_512: + case Intrinsic::x86_avx512_min_pd_512: { unsigned Opcode; switch (IntNo) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. @@ -11153,12 +11216,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { case Intrinsic::x86_sse2_max_pd: case Intrinsic::x86_avx_max_ps_256: case Intrinsic::x86_avx_max_pd_256: + case Intrinsic::x86_avx512_max_ps_512: + case Intrinsic::x86_avx512_max_pd_512: Opcode = X86ISD::FMAX; break; case Intrinsic::x86_sse_min_ps: case Intrinsic::x86_sse2_min_pd: case Intrinsic::x86_avx_min_ps_256: case Intrinsic::x86_avx_min_pd_256: + case Intrinsic::x86_avx512_min_ps_512: + case Intrinsic::x86_avx512_min_pd_512: Opcode = X86ISD::FMIN; break; } @@ -13375,6 +13442,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; case X86ISD::VZEXT: return "X86ISD::VZEXT"; case X86ISD::VSEXT: return "X86ISD::VSEXT"; + case X86ISD::VTRUNC: return "X86ISD::VTRUNC"; + case X86ISD::VTRUNCM: return "X86ISD::VTRUNCM"; case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; case X86ISD::VFPROUND: return "X86ISD::VFPROUND"; case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ"; @@ -16274,6 +16343,23 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS); } + if (Subtarget->hasAVX512() && VT.isVector() && + Cond.getValueType().getVectorElementType() == MVT::i1) { + // v16i8 (select v16i1, v16i8, v16i8) does not have a proper + // lowering on AVX-512. In this case we convert it to + // v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction. + // The same situation for all 128 and 256-bit vectors of i8 and i16 + EVT OpVT = LHS.getValueType(); + if ((OpVT.is128BitVector() || OpVT.is256BitVector()) && + (OpVT.getVectorElementType() == MVT::i8 || + OpVT.getVectorElementType() == MVT::i16)) { + Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, OpVT, Cond); + DCI.AddToWorklist(Cond.getNode()); + return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS); + } + else + return SDValue(); + } // If this is a select between two integer constants, try to do some // optimizations. if (ConstantSDNode *TrueC = dyn_cast(LHS)) { diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 40b2a9ce7670..632a5b63baca 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -254,6 +254,12 @@ namespace llvm { // VSEXT - Vector integer signed-extend. VSEXT, + // VTRUNC - Vector integer truncate. + VTRUNC, + + // VTRUNC - Vector integer truncate with mask. + VTRUNCM, + // VFPEXT - Vector FP extend. VFPEXT, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 95b0de41fa8e..6b2f1608ca40 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2805,6 +2805,123 @@ def : Pat<(v8f64 (frint VR512:$src)), def : Pat<(v8f64 (ftrunc VR512:$src)), (VRNDSCALEZPDr VR512:$src, (i32 0x3))>; +//------------------------------------------------- +// Integer truncate and extend operations +//------------------------------------------------- + +multiclass avx512_trunc_sat opc, string OpcodeStr, + RegisterClass dstRC, RegisterClass srcRC, + RegisterClass KRC, X86MemOperand x86memop> { + def rr : AVX512XS8I, EVEX; + + def krr : AVX512XS8I, EVEX, EVEX_KZ; + + def mr : AVX512XS8I, EVEX; +} +defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; +defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; +defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; +defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; +defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; +defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; +defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM, + i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; +defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM, + i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; +defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM, + i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; +defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM, + i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; +defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM, + i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; +defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM, + i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; +defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; +defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; +defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; + +def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>; +def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>; +def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>; +def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>; +def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>; + +def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), + (VPMOVDBkrr VK16WM:$mask, VR512:$src)>; +def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), + (VPMOVDWkrr VK16WM:$mask, VR512:$src)>; +def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), + (VPMOVQWkrr VK8WM:$mask, VR512:$src)>; +def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), + (VPMOVQDkrr VK8WM:$mask, VR512:$src)>; + + +multiclass avx512_extend opc, string OpcodeStr, RegisterClass DstRC, + RegisterClass SrcRC, SDNode OpNode, PatFrag mem_frag, + X86MemOperand x86memop, ValueType OpVT, ValueType InVT> { + + def rr : AVX5128I, EVEX; + def rm : AVX5128I, + EVEX; +} + +defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VR512, VR128X, X86vzext, + memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512, + EVEX_CD8<8, CD8VQ>; +defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VR512, VR128X, X86vzext, + memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512, + EVEX_CD8<8, CD8VO>; +defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VR512, VR256X, X86vzext, + memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512, + EVEX_CD8<16, CD8VH>; +defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VR512, VR128X, X86vzext, + memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512, + EVEX_CD8<16, CD8VQ>; +defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VR512, VR256X, X86vzext, + memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512, + EVEX_CD8<32, CD8VH>; + +defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VR512, VR128X, X86vsext, + memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512, + EVEX_CD8<8, CD8VQ>; +defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VR512, VR128X, X86vsext, + memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512, + EVEX_CD8<8, CD8VO>; +defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VR512, VR256X, X86vsext, + memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512, + EVEX_CD8<16, CD8VH>; +defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VR512, VR128X, X86vsext, + memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512, + EVEX_CD8<16, CD8VQ>; +defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VR512, VR256X, X86vsext, + memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512, + EVEX_CD8<32, CD8VH>; + //===----------------------------------------------------------------------===// // VSHUFPS - VSHUFPD Operations diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index ab7c602a59e2..f5044809541b 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -105,6 +105,13 @@ def X86vsext : SDNode<"X86ISD::VSEXT", SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCisInt<1>]>>; +def X86vtrunc : SDNode<"X86ISD::VTRUNC", + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisInt<0>, SDTCisInt<1>]>>; +def X86vtruncm : SDNode<"X86ISD::VTRUNCM", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisInt<0>, SDTCisInt<1>, + SDTCisVec<2>, SDTCisInt<2>]>>; def X86vfpext : SDNode<"X86ISD::VFPEXT", SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisFP<0>, SDTCisFP<1>]>>; diff --git a/llvm/test/CodeGen/X86/avx512-trunc-ext.ll b/llvm/test/CodeGen/X86/avx512-trunc-ext.ll new file mode 100644 index 000000000000..a4f9a0394f64 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512-trunc-ext.ll @@ -0,0 +1,127 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s + +; CHECK-LABEL: trunc_16x32_to_16x8 +; CHECK: vpmovdb +; CHECK: ret +define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) nounwind readnone { + %x = trunc <16 x i32> %i to <16 x i8> + ret <16 x i8> %x +} + +; CHECK-LABEL: trunc_8x64_to_8x16 +; CHECK: vpmovqw +; CHECK: ret +define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) nounwind readnone { + %x = trunc <8 x i64> %i to <8 x i16> + ret <8 x i16> %x +} + + +; CHECK-LABEL: zext_16x8_to_16x32 +; CHECK; vpmovzxbd {{.*}}%zmm +; CHECK: ret +define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { + %x = zext <16 x i8> %i to <16 x i32> + ret <16 x i32> %x +} + +; CHECK-LABEL: sext_16x8_to_16x32 +; CHECK; vpmovsxbd {{.*}}%zmm +; CHECK: ret +define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { + %x = sext <16 x i8> %i to <16 x i32> + ret <16 x i32> %x +} + + +; CHECK-LABEL: zext_16x16_to_16x32 +; CHECK; vpmovzxwd {{.*}}%zmm +; CHECK: ret +define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %i) nounwind readnone { + %x = zext <16 x i16> %i to <16 x i32> + ret <16 x i32> %x +} + +; CHECK-LABEL: zext_8x16_to_8x64 +; CHECK; vpmovzxwq +; CHECK: ret +define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %i) nounwind readnone { + %x = zext <8 x i16> %i to <8 x i64> + ret <8 x i64> %x +} + +;CHECK-LABEL: fptrunc_test +;CHECK: vcvtpd2ps {{.*}}%zmm +;CHECK: ret +define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone { + %b = fptrunc <8 x double> %a to <8 x float> + ret <8 x float> %b +} + +;CHECK-LABEL: fpext_test +;CHECK: vcvtps2pd {{.*}}%zmm +;CHECK: ret +define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone { + %b = fpext <8 x float> %a to <8 x double> + ret <8 x double> %b +} + +; CHECK-LABEL: zext_16i1_to_16xi32 +; CHECK: vpbroadcastd LCP{{.*}}(%rip), %zmm0{%k1}{z} +; CHECK: ret +define <16 x i32> @zext_16i1_to_16xi32(i16 %b) { + %a = bitcast i16 %b to <16 x i1> + %c = zext <16 x i1> %a to <16 x i32> + ret <16 x i32> %c +} + +; CHECK-LABEL: zext_8i1_to_8xi64 +; CHECK: vpbroadcastq LCP{{.*}}(%rip), %zmm0{%k1}{z} +; CHECK: ret +define <8 x i64> @zext_8i1_to_8xi64(i8 %b) { + %a = bitcast i8 %b to <8 x i1> + %c = zext <8 x i1> %a to <8 x i64> + ret <8 x i64> %c +} + +; CHECK-LABEL: trunc_16i8_to_16i1 +; CHECK: vpmovsxbd +; CHECK: vpandd +; CHECK: vptestmd +; CHECK: ret +define i16 @trunc_16i8_to_16i1(<16 x i8> %a) { + %mask_b = trunc <16 x i8>%a to <16 x i1> + %mask = bitcast <16 x i1> %mask_b to i16 + ret i16 %mask +} + +; CHECK-LABEL: trunc_16i32_to_16i1 +; CHECK: vpandd +; CHECK: vptestmd +; CHECK: ret +define i16 @trunc_16i32_to_16i1(<16 x i32> %a) { + %mask_b = trunc <16 x i32>%a to <16 x i1> + %mask = bitcast <16 x i1> %mask_b to i16 + ret i16 %mask +} + +; CHECK-LABEL: trunc_8i16_to_8i1 +; CHECK: vpmovsxwq +; CHECK: vpandq LCP{{.*}}(%rip){1to8} +; CHECK: vptestmq +; CHECK: ret +define i8 @trunc_8i16_to_8i1(<8 x i16> %a) { + %mask_b = trunc <8 x i16>%a to <8 x i1> + %mask = bitcast <8 x i1> %mask_b to i8 + ret i8 %mask +} + +; CHECK: sext_8i1_8i32 +; CHECK: vpbroadcastq LCP{{.*}}(%rip), %zmm0{%k1}{z} +; CHECK: ret +define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind { + %x = icmp slt <8 x i32> %a1, %a2 + %x1 = xor <8 x i1>%x, + %y = sext <8 x i1> %x1 to <8 x i32> + ret <8 x i32> %y +}