AVX-512: added extend and truncate instructions.

llvm-svn: 189580
This commit is contained in:
Elena Demikhovsky 2013-08-29 11:56:53 +00:00
parent ab7ff52efd
commit 980c6b08b1
5 changed files with 371 additions and 28 deletions

View File

@ -8856,6 +8856,37 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
static SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
SelectionDAG &DAG) {
MVT VT = Op->getValueType(0).getSimpleVT();
SDValue In = Op->getOperand(0);
MVT InVT = In.getValueType().getSimpleVT();
SDLoc DL(Op);
unsigned int NumElts = VT.getVectorNumElements();
if (NumElts != 8 && NumElts != 16)
return SDValue();
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
EVT ExtVT = (NumElts == 8)? MVT::v8i64 : MVT::v16i32;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Now we have only mask extension
assert(InVT.getVectorElementType() == MVT::i1);
SDValue Cst = DAG.getTargetConstant(1, ExtVT.getScalarType());
const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, DL, ExtVT, In, Ld);
if (VT.is512BitVector())
return Brcst;
return DAG.getNode(X86ISD::VTRUNC, DL, VT, Brcst);
}
static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
if (Subtarget->hasFp256()) {
@ -8874,6 +8905,9 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
SDValue In = Op.getOperand(0);
MVT SVT = In.getSimpleValueType();
if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1)
return LowerZERO_EXTEND_AVX512(Op, DAG);
if (Subtarget->hasFp256()) {
SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
if (Res.getNode())
@ -8902,11 +8936,37 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
MVT SVT = In.getSimpleValueType();
MVT InVT = In.getSimpleValueType();
assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
"Invalid TRUNCATE operation");
if ((VT == MVT::v4i32) && (SVT == MVT::v4i64)) {
if (InVT.is512BitVector() || VT.getVectorElementType() == MVT::i1) {
if (VT.getVectorElementType().getSizeInBits() >=8)
return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
unsigned NumElts = InVT.getVectorNumElements();
assert ((NumElts == 8 || NumElts == 16) && "Unexpected vector type");
if (InVT.getSizeInBits() < 512) {
MVT ExtVT = (NumElts == 16)? MVT::v16i32 : MVT::v8i64;
In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
InVT = ExtVT;
}
SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType());
const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
SDValue CP = DAG.getConstantPool(C, getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
SDValue OneV = DAG.getNode(X86ISD::VBROADCAST, DL, InVT, Ld);
SDValue And = DAG.getNode(ISD::AND, DL, InVT, OneV, In);
return DAG.getNode(X86ISD::TESTM, DL, VT, And, And);
}
if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
if (Subtarget->hasInt256()) {
static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
@ -8937,7 +8997,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask2);
}
if ((VT == MVT::v8i16) && (SVT == MVT::v8i32)) {
if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
// On AVX2, v8i32 -> v8i16 becomed PSHUFB.
if (Subtarget->hasInt256()) {
In = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, In);
@ -8995,11 +9055,9 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
}
// Handle truncation of V256 to V128 using shuffles.
if (!VT.is128BitVector() || !SVT.is256BitVector())
if (!VT.is128BitVector() || !InVT.is256BitVector())
return SDValue();
assert(VT.getVectorNumElements() != SVT.getVectorNumElements() &&
"Invalid op");
assert(Subtarget->hasFp256() && "256-bit vector without AVX!");
unsigned NumElems = VT.getVectorNumElements();
@ -10282,28 +10340,29 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) {
MVT InVT = In.getSimpleValueType();
SDLoc dl(Op);
if (InVT.getVectorElementType().getSizeInBits() >=8 &&
VT.getVectorElementType().getSizeInBits() >= 32)
unsigned int NumElts = VT.getVectorNumElements();
if (NumElts != 8 && NumElts != 16)
return SDValue();
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
if (InVT.getVectorElementType() == MVT::i1) {
unsigned int NumElts = InVT.getVectorNumElements();
assert ((NumElts == 8 || NumElts == 16) &&
"Unsupported SIGN_EXTEND operation");
if (VT.getVectorElementType().getSizeInBits() >= 32) {
Constant *C =
ConstantInt::get(*DAG.getContext(),
(NumElts == 8)? APInt(64, ~0ULL): APInt(32, ~0U));
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
SDValue Ld = DAG.getLoad(VT.getScalarType(), dl, DAG.getEntryNode(), CP,
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
return DAG.getNode(X86ISD::VBROADCASTM, dl, VT, In, Ld);
}
}
return SDValue();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
MVT ExtVT = (NumElts == 8) ? MVT::v8i64 : MVT::v16i32;
Constant *C = ConstantInt::get(*DAG.getContext(),
APInt::getAllOnesValue(ExtVT.getScalarType().getSizeInBits()));
SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
SDValue Ld = DAG.getLoad(ExtVT.getScalarType(), dl, DAG.getEntryNode(), CP,
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, dl, ExtVT, In, Ld);
if (VT.is512BitVector())
return Brcst;
return DAG.getNode(X86ISD::VTRUNC, dl, VT, Brcst);
}
static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
@ -11142,10 +11201,14 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_sse2_max_pd:
case Intrinsic::x86_avx_max_ps_256:
case Intrinsic::x86_avx_max_pd_256:
case Intrinsic::x86_avx512_max_ps_512:
case Intrinsic::x86_avx512_max_pd_512:
case Intrinsic::x86_sse_min_ps:
case Intrinsic::x86_sse2_min_pd:
case Intrinsic::x86_avx_min_ps_256:
case Intrinsic::x86_avx_min_pd_256: {
case Intrinsic::x86_avx_min_pd_256:
case Intrinsic::x86_avx512_min_ps_512:
case Intrinsic::x86_avx512_min_pd_512: {
unsigned Opcode;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
@ -11153,12 +11216,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_sse2_max_pd:
case Intrinsic::x86_avx_max_ps_256:
case Intrinsic::x86_avx_max_pd_256:
case Intrinsic::x86_avx512_max_ps_512:
case Intrinsic::x86_avx512_max_pd_512:
Opcode = X86ISD::FMAX;
break;
case Intrinsic::x86_sse_min_ps:
case Intrinsic::x86_sse2_min_pd:
case Intrinsic::x86_avx_min_ps_256:
case Intrinsic::x86_avx_min_pd_256:
case Intrinsic::x86_avx512_min_ps_512:
case Intrinsic::x86_avx512_min_pd_512:
Opcode = X86ISD::FMIN;
break;
}
@ -13375,6 +13442,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VZEXT: return "X86ISD::VZEXT";
case X86ISD::VSEXT: return "X86ISD::VSEXT";
case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
case X86ISD::VTRUNCM: return "X86ISD::VTRUNCM";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
@ -16274,6 +16343,23 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
}
if (Subtarget->hasAVX512() && VT.isVector() &&
Cond.getValueType().getVectorElementType() == MVT::i1) {
// v16i8 (select v16i1, v16i8, v16i8) does not have a proper
// lowering on AVX-512. In this case we convert it to
// v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction.
// The same situation for all 128 and 256-bit vectors of i8 and i16
EVT OpVT = LHS.getValueType();
if ((OpVT.is128BitVector() || OpVT.is256BitVector()) &&
(OpVT.getVectorElementType() == MVT::i8 ||
OpVT.getVectorElementType() == MVT::i16)) {
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, OpVT, Cond);
DCI.AddToWorklist(Cond.getNode());
return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS);
}
else
return SDValue();
}
// If this is a select between two integer constants, try to do some
// optimizations.
if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {

View File

@ -254,6 +254,12 @@ namespace llvm {
// VSEXT - Vector integer signed-extend.
VSEXT,
// VTRUNC - Vector integer truncate.
VTRUNC,
// VTRUNC - Vector integer truncate with mask.
VTRUNCM,
// VFPEXT - Vector FP extend.
VFPEXT,

View File

@ -2805,6 +2805,123 @@ def : Pat<(v8f64 (frint VR512:$src)),
def : Pat<(v8f64 (ftrunc VR512:$src)),
(VRNDSCALEZPDr VR512:$src, (i32 0x3))>;
//-------------------------------------------------
// Integer truncate and extend operations
//-------------------------------------------------
multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
RegisterClass dstRC, RegisterClass srcRC,
RegisterClass KRC, X86MemOperand x86memop> {
def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
(ins srcRC:$src),
!strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
[]>, EVEX;
def krr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
(ins KRC:$mask, srcRC:$src),
!strconcat(OpcodeStr,
"\t{$src, ${dst}{${mask}}{z}|${dst}{${mask}}{z}, $src}"),
[]>, EVEX, EVEX_KZ;
def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[]>, EVEX;
}
defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM,
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM,
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM,
i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM,
i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM,
i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM,
i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM,
i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM,
i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM,
i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM,
i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM,
i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM,
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM,
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM,
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>;
def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>;
def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>;
def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>;
def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>;
def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
(VPMOVDBkrr VK16WM:$mask, VR512:$src)>;
def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
(VPMOVDWkrr VK16WM:$mask, VR512:$src)>;
def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
(VPMOVQWkrr VK8WM:$mask, VR512:$src)>;
def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
(VPMOVQDkrr VK8WM:$mask, VR512:$src)>;
multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass DstRC,
RegisterClass SrcRC, SDNode OpNode, PatFrag mem_frag,
X86MemOperand x86memop, ValueType OpVT, ValueType InVT> {
def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
(ins SrcRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
(ins x86memop:$src),
!strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst,
(OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
EVEX;
}
defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VR512, VR128X, X86vzext,
memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
EVEX_CD8<8, CD8VQ>;
defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VR512, VR128X, X86vzext,
memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
EVEX_CD8<8, CD8VO>;
defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VR512, VR256X, X86vzext,
memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
EVEX_CD8<16, CD8VH>;
defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VR512, VR128X, X86vzext,
memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
EVEX_CD8<16, CD8VQ>;
defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VR512, VR256X, X86vzext,
memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
EVEX_CD8<32, CD8VH>;
defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VR512, VR128X, X86vsext,
memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
EVEX_CD8<8, CD8VQ>;
defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VR512, VR128X, X86vsext,
memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
EVEX_CD8<8, CD8VO>;
defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VR512, VR256X, X86vsext,
memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
EVEX_CD8<16, CD8VH>;
defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VR512, VR128X, X86vsext,
memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
EVEX_CD8<16, CD8VQ>;
defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VR512, VR256X, X86vsext,
memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
EVEX_CD8<32, CD8VH>;
//===----------------------------------------------------------------------===//
// VSHUFPS - VSHUFPD Operations

View File

@ -105,6 +105,13 @@ def X86vsext : SDNode<"X86ISD::VSEXT",
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCisInt<1>]>>;
def X86vtrunc : SDNode<"X86ISD::VTRUNC",
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCisInt<1>]>>;
def X86vtruncm : SDNode<"X86ISD::VTRUNCM",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCisInt<1>,
SDTCisVec<2>, SDTCisInt<2>]>>;
def X86vfpext : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisFP<0>, SDTCisFP<1>]>>;

View File

@ -0,0 +1,127 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
; CHECK-LABEL: trunc_16x32_to_16x8
; CHECK: vpmovdb
; CHECK: ret
define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) nounwind readnone {
%x = trunc <16 x i32> %i to <16 x i8>
ret <16 x i8> %x
}
; CHECK-LABEL: trunc_8x64_to_8x16
; CHECK: vpmovqw
; CHECK: ret
define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) nounwind readnone {
%x = trunc <8 x i64> %i to <8 x i16>
ret <8 x i16> %x
}
; CHECK-LABEL: zext_16x8_to_16x32
; CHECK; vpmovzxbd {{.*}}%zmm
; CHECK: ret
define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
%x = zext <16 x i8> %i to <16 x i32>
ret <16 x i32> %x
}
; CHECK-LABEL: sext_16x8_to_16x32
; CHECK; vpmovsxbd {{.*}}%zmm
; CHECK: ret
define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
%x = sext <16 x i8> %i to <16 x i32>
ret <16 x i32> %x
}
; CHECK-LABEL: zext_16x16_to_16x32
; CHECK; vpmovzxwd {{.*}}%zmm
; CHECK: ret
define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %i) nounwind readnone {
%x = zext <16 x i16> %i to <16 x i32>
ret <16 x i32> %x
}
; CHECK-LABEL: zext_8x16_to_8x64
; CHECK; vpmovzxwq
; CHECK: ret
define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %i) nounwind readnone {
%x = zext <8 x i16> %i to <8 x i64>
ret <8 x i64> %x
}
;CHECK-LABEL: fptrunc_test
;CHECK: vcvtpd2ps {{.*}}%zmm
;CHECK: ret
define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
%b = fptrunc <8 x double> %a to <8 x float>
ret <8 x float> %b
}
;CHECK-LABEL: fpext_test
;CHECK: vcvtps2pd {{.*}}%zmm
;CHECK: ret
define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
%b = fpext <8 x float> %a to <8 x double>
ret <8 x double> %b
}
; CHECK-LABEL: zext_16i1_to_16xi32
; CHECK: vpbroadcastd LCP{{.*}}(%rip), %zmm0{%k1}{z}
; CHECK: ret
define <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
%a = bitcast i16 %b to <16 x i1>
%c = zext <16 x i1> %a to <16 x i32>
ret <16 x i32> %c
}
; CHECK-LABEL: zext_8i1_to_8xi64
; CHECK: vpbroadcastq LCP{{.*}}(%rip), %zmm0{%k1}{z}
; CHECK: ret
define <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
%a = bitcast i8 %b to <8 x i1>
%c = zext <8 x i1> %a to <8 x i64>
ret <8 x i64> %c
}
; CHECK-LABEL: trunc_16i8_to_16i1
; CHECK: vpmovsxbd
; CHECK: vpandd
; CHECK: vptestmd
; CHECK: ret
define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
%mask_b = trunc <16 x i8>%a to <16 x i1>
%mask = bitcast <16 x i1> %mask_b to i16
ret i16 %mask
}
; CHECK-LABEL: trunc_16i32_to_16i1
; CHECK: vpandd
; CHECK: vptestmd
; CHECK: ret
define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
%mask_b = trunc <16 x i32>%a to <16 x i1>
%mask = bitcast <16 x i1> %mask_b to i16
ret i16 %mask
}
; CHECK-LABEL: trunc_8i16_to_8i1
; CHECK: vpmovsxwq
; CHECK: vpandq LCP{{.*}}(%rip){1to8}
; CHECK: vptestmq
; CHECK: ret
define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
%mask_b = trunc <8 x i16>%a to <8 x i1>
%mask = bitcast <8 x i1> %mask_b to i8
ret i8 %mask
}
; CHECK: sext_8i1_8i32
; CHECK: vpbroadcastq LCP{{.*}}(%rip), %zmm0{%k1}{z}
; CHECK: ret
define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
%x = icmp slt <8 x i32> %a1, %a2
%x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
%y = sext <8 x i1> %x1 to <8 x i32>
ret <8 x i32> %y
}