forked from OSchip/llvm-project
AVX-512: added extend and truncate instructions.
llvm-svn: 189580
This commit is contained in:
parent
ab7ff52efd
commit
980c6b08b1
|
@ -8856,6 +8856,37 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
|
|||
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
|
||||
}
|
||||
|
||||
static SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
|
||||
SelectionDAG &DAG) {
|
||||
MVT VT = Op->getValueType(0).getSimpleVT();
|
||||
SDValue In = Op->getOperand(0);
|
||||
MVT InVT = In.getValueType().getSimpleVT();
|
||||
SDLoc DL(Op);
|
||||
unsigned int NumElts = VT.getVectorNumElements();
|
||||
if (NumElts != 8 && NumElts != 16)
|
||||
return SDValue();
|
||||
|
||||
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
|
||||
return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
|
||||
|
||||
EVT ExtVT = (NumElts == 8)? MVT::v8i64 : MVT::v16i32;
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
// Now we have only mask extension
|
||||
assert(InVT.getVectorElementType() == MVT::i1);
|
||||
SDValue Cst = DAG.getTargetConstant(1, ExtVT.getScalarType());
|
||||
const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
|
||||
SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
|
||||
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
|
||||
SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
|
||||
MachinePointerInfo::getConstantPool(),
|
||||
false, false, false, Alignment);
|
||||
|
||||
SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, DL, ExtVT, In, Ld);
|
||||
if (VT.is512BitVector())
|
||||
return Brcst;
|
||||
return DAG.getNode(X86ISD::VTRUNC, DL, VT, Brcst);
|
||||
}
|
||||
|
||||
static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
if (Subtarget->hasFp256()) {
|
||||
|
@ -8874,6 +8905,9 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
|
|||
SDValue In = Op.getOperand(0);
|
||||
MVT SVT = In.getSimpleValueType();
|
||||
|
||||
if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1)
|
||||
return LowerZERO_EXTEND_AVX512(Op, DAG);
|
||||
|
||||
if (Subtarget->hasFp256()) {
|
||||
SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
|
||||
if (Res.getNode())
|
||||
|
@ -8902,11 +8936,37 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
|
|||
|
||||
SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDLoc DL(Op);
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
SDValue In = Op.getOperand(0);
|
||||
MVT SVT = In.getSimpleValueType();
|
||||
MVT InVT = In.getSimpleValueType();
|
||||
assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
|
||||
"Invalid TRUNCATE operation");
|
||||
|
||||
if ((VT == MVT::v4i32) && (SVT == MVT::v4i64)) {
|
||||
if (InVT.is512BitVector() || VT.getVectorElementType() == MVT::i1) {
|
||||
if (VT.getVectorElementType().getSizeInBits() >=8)
|
||||
return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
|
||||
|
||||
assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
|
||||
unsigned NumElts = InVT.getVectorNumElements();
|
||||
assert ((NumElts == 8 || NumElts == 16) && "Unexpected vector type");
|
||||
if (InVT.getSizeInBits() < 512) {
|
||||
MVT ExtVT = (NumElts == 16)? MVT::v16i32 : MVT::v8i64;
|
||||
In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
|
||||
InVT = ExtVT;
|
||||
}
|
||||
SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType());
|
||||
const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
|
||||
SDValue CP = DAG.getConstantPool(C, getPointerTy());
|
||||
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
|
||||
SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
|
||||
MachinePointerInfo::getConstantPool(),
|
||||
false, false, false, Alignment);
|
||||
SDValue OneV = DAG.getNode(X86ISD::VBROADCAST, DL, InVT, Ld);
|
||||
SDValue And = DAG.getNode(ISD::AND, DL, InVT, OneV, In);
|
||||
return DAG.getNode(X86ISD::TESTM, DL, VT, And, And);
|
||||
}
|
||||
|
||||
if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
|
||||
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
|
||||
if (Subtarget->hasInt256()) {
|
||||
static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
|
||||
|
@ -8937,7 +8997,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
|
|||
return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask2);
|
||||
}
|
||||
|
||||
if ((VT == MVT::v8i16) && (SVT == MVT::v8i32)) {
|
||||
if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
|
||||
// On AVX2, v8i32 -> v8i16 becomed PSHUFB.
|
||||
if (Subtarget->hasInt256()) {
|
||||
In = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, In);
|
||||
|
@ -8995,11 +9055,9 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
|
|||
}
|
||||
|
||||
// Handle truncation of V256 to V128 using shuffles.
|
||||
if (!VT.is128BitVector() || !SVT.is256BitVector())
|
||||
if (!VT.is128BitVector() || !InVT.is256BitVector())
|
||||
return SDValue();
|
||||
|
||||
assert(VT.getVectorNumElements() != SVT.getVectorNumElements() &&
|
||||
"Invalid op");
|
||||
assert(Subtarget->hasFp256() && "256-bit vector without AVX!");
|
||||
|
||||
unsigned NumElems = VT.getVectorNumElements();
|
||||
|
@ -10282,28 +10340,29 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) {
|
|||
MVT InVT = In.getSimpleValueType();
|
||||
SDLoc dl(Op);
|
||||
|
||||
if (InVT.getVectorElementType().getSizeInBits() >=8 &&
|
||||
VT.getVectorElementType().getSizeInBits() >= 32)
|
||||
unsigned int NumElts = VT.getVectorNumElements();
|
||||
if (NumElts != 8 && NumElts != 16)
|
||||
return SDValue();
|
||||
|
||||
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
|
||||
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
|
||||
|
||||
if (InVT.getVectorElementType() == MVT::i1) {
|
||||
unsigned int NumElts = InVT.getVectorNumElements();
|
||||
assert ((NumElts == 8 || NumElts == 16) &&
|
||||
"Unsupported SIGN_EXTEND operation");
|
||||
if (VT.getVectorElementType().getSizeInBits() >= 32) {
|
||||
Constant *C =
|
||||
ConstantInt::get(*DAG.getContext(),
|
||||
(NumElts == 8)? APInt(64, ~0ULL): APInt(32, ~0U));
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
|
||||
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
|
||||
SDValue Ld = DAG.getLoad(VT.getScalarType(), dl, DAG.getEntryNode(), CP,
|
||||
MachinePointerInfo::getConstantPool(),
|
||||
false, false, false, Alignment);
|
||||
return DAG.getNode(X86ISD::VBROADCASTM, dl, VT, In, Ld);
|
||||
}
|
||||
}
|
||||
return SDValue();
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
|
||||
|
||||
MVT ExtVT = (NumElts == 8) ? MVT::v8i64 : MVT::v16i32;
|
||||
Constant *C = ConstantInt::get(*DAG.getContext(),
|
||||
APInt::getAllOnesValue(ExtVT.getScalarType().getSizeInBits()));
|
||||
|
||||
SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
|
||||
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
|
||||
SDValue Ld = DAG.getLoad(ExtVT.getScalarType(), dl, DAG.getEntryNode(), CP,
|
||||
MachinePointerInfo::getConstantPool(),
|
||||
false, false, false, Alignment);
|
||||
SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, dl, ExtVT, In, Ld);
|
||||
if (VT.is512BitVector())
|
||||
return Brcst;
|
||||
return DAG.getNode(X86ISD::VTRUNC, dl, VT, Brcst);
|
||||
}
|
||||
|
||||
static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
|
||||
|
@ -11142,10 +11201,14 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
|
|||
case Intrinsic::x86_sse2_max_pd:
|
||||
case Intrinsic::x86_avx_max_ps_256:
|
||||
case Intrinsic::x86_avx_max_pd_256:
|
||||
case Intrinsic::x86_avx512_max_ps_512:
|
||||
case Intrinsic::x86_avx512_max_pd_512:
|
||||
case Intrinsic::x86_sse_min_ps:
|
||||
case Intrinsic::x86_sse2_min_pd:
|
||||
case Intrinsic::x86_avx_min_ps_256:
|
||||
case Intrinsic::x86_avx_min_pd_256: {
|
||||
case Intrinsic::x86_avx_min_pd_256:
|
||||
case Intrinsic::x86_avx512_min_ps_512:
|
||||
case Intrinsic::x86_avx512_min_pd_512: {
|
||||
unsigned Opcode;
|
||||
switch (IntNo) {
|
||||
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
|
||||
|
@ -11153,12 +11216,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
|
|||
case Intrinsic::x86_sse2_max_pd:
|
||||
case Intrinsic::x86_avx_max_ps_256:
|
||||
case Intrinsic::x86_avx_max_pd_256:
|
||||
case Intrinsic::x86_avx512_max_ps_512:
|
||||
case Intrinsic::x86_avx512_max_pd_512:
|
||||
Opcode = X86ISD::FMAX;
|
||||
break;
|
||||
case Intrinsic::x86_sse_min_ps:
|
||||
case Intrinsic::x86_sse2_min_pd:
|
||||
case Intrinsic::x86_avx_min_ps_256:
|
||||
case Intrinsic::x86_avx_min_pd_256:
|
||||
case Intrinsic::x86_avx512_min_ps_512:
|
||||
case Intrinsic::x86_avx512_min_pd_512:
|
||||
Opcode = X86ISD::FMIN;
|
||||
break;
|
||||
}
|
||||
|
@ -13375,6 +13442,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
|
||||
case X86ISD::VZEXT: return "X86ISD::VZEXT";
|
||||
case X86ISD::VSEXT: return "X86ISD::VSEXT";
|
||||
case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
|
||||
case X86ISD::VTRUNCM: return "X86ISD::VTRUNCM";
|
||||
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
|
||||
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
|
||||
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
|
||||
|
@ -16274,6 +16343,23 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
|
|||
return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
|
||||
}
|
||||
|
||||
if (Subtarget->hasAVX512() && VT.isVector() &&
|
||||
Cond.getValueType().getVectorElementType() == MVT::i1) {
|
||||
// v16i8 (select v16i1, v16i8, v16i8) does not have a proper
|
||||
// lowering on AVX-512. In this case we convert it to
|
||||
// v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction.
|
||||
// The same situation for all 128 and 256-bit vectors of i8 and i16
|
||||
EVT OpVT = LHS.getValueType();
|
||||
if ((OpVT.is128BitVector() || OpVT.is256BitVector()) &&
|
||||
(OpVT.getVectorElementType() == MVT::i8 ||
|
||||
OpVT.getVectorElementType() == MVT::i16)) {
|
||||
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, OpVT, Cond);
|
||||
DCI.AddToWorklist(Cond.getNode());
|
||||
return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS);
|
||||
}
|
||||
else
|
||||
return SDValue();
|
||||
}
|
||||
// If this is a select between two integer constants, try to do some
|
||||
// optimizations.
|
||||
if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {
|
||||
|
|
|
@ -254,6 +254,12 @@ namespace llvm {
|
|||
// VSEXT - Vector integer signed-extend.
|
||||
VSEXT,
|
||||
|
||||
// VTRUNC - Vector integer truncate.
|
||||
VTRUNC,
|
||||
|
||||
// VTRUNC - Vector integer truncate with mask.
|
||||
VTRUNCM,
|
||||
|
||||
// VFPEXT - Vector FP extend.
|
||||
VFPEXT,
|
||||
|
||||
|
|
|
@ -2805,6 +2805,123 @@ def : Pat<(v8f64 (frint VR512:$src)),
|
|||
def : Pat<(v8f64 (ftrunc VR512:$src)),
|
||||
(VRNDSCALEZPDr VR512:$src, (i32 0x3))>;
|
||||
|
||||
//-------------------------------------------------
|
||||
// Integer truncate and extend operations
|
||||
//-------------------------------------------------
|
||||
|
||||
multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
|
||||
RegisterClass dstRC, RegisterClass srcRC,
|
||||
RegisterClass KRC, X86MemOperand x86memop> {
|
||||
def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
|
||||
(ins srcRC:$src),
|
||||
!strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
|
||||
[]>, EVEX;
|
||||
|
||||
def krr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
|
||||
(ins KRC:$mask, srcRC:$src),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src, ${dst}{${mask}}{z}|${dst}{${mask}}{z}, $src}"),
|
||||
[]>, EVEX, EVEX_KZ;
|
||||
|
||||
def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[]>, EVEX;
|
||||
}
|
||||
defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
|
||||
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
|
||||
defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM,
|
||||
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
|
||||
defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM,
|
||||
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
|
||||
defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM,
|
||||
i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
|
||||
defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM,
|
||||
i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
|
||||
defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM,
|
||||
i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
|
||||
defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM,
|
||||
i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
|
||||
defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM,
|
||||
i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
|
||||
defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM,
|
||||
i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
|
||||
defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM,
|
||||
i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
|
||||
defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM,
|
||||
i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
|
||||
defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM,
|
||||
i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
|
||||
defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM,
|
||||
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
|
||||
defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM,
|
||||
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
|
||||
defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM,
|
||||
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
|
||||
|
||||
def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>;
|
||||
def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>;
|
||||
def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>;
|
||||
def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>;
|
||||
def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>;
|
||||
|
||||
def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
|
||||
(VPMOVDBkrr VK16WM:$mask, VR512:$src)>;
|
||||
def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
|
||||
(VPMOVDWkrr VK16WM:$mask, VR512:$src)>;
|
||||
def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
|
||||
(VPMOVQWkrr VK8WM:$mask, VR512:$src)>;
|
||||
def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
|
||||
(VPMOVQDkrr VK8WM:$mask, VR512:$src)>;
|
||||
|
||||
|
||||
multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass DstRC,
|
||||
RegisterClass SrcRC, SDNode OpNode, PatFrag mem_frag,
|
||||
X86MemOperand x86memop, ValueType OpVT, ValueType InVT> {
|
||||
|
||||
def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
|
||||
(ins SrcRC:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
|
||||
def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
|
||||
(ins x86memop:$src),
|
||||
!strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
|
||||
[(set DstRC:$dst,
|
||||
(OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
|
||||
EVEX;
|
||||
}
|
||||
|
||||
defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VR512, VR128X, X86vzext,
|
||||
memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
|
||||
EVEX_CD8<8, CD8VQ>;
|
||||
defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VR512, VR128X, X86vzext,
|
||||
memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
|
||||
EVEX_CD8<8, CD8VO>;
|
||||
defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VR512, VR256X, X86vzext,
|
||||
memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
|
||||
EVEX_CD8<16, CD8VH>;
|
||||
defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VR512, VR128X, X86vzext,
|
||||
memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
|
||||
EVEX_CD8<16, CD8VQ>;
|
||||
defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VR512, VR256X, X86vzext,
|
||||
memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VH>;
|
||||
|
||||
defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VR512, VR128X, X86vsext,
|
||||
memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
|
||||
EVEX_CD8<8, CD8VQ>;
|
||||
defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VR512, VR128X, X86vsext,
|
||||
memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
|
||||
EVEX_CD8<8, CD8VO>;
|
||||
defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VR512, VR256X, X86vsext,
|
||||
memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
|
||||
EVEX_CD8<16, CD8VH>;
|
||||
defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VR512, VR128X, X86vsext,
|
||||
memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
|
||||
EVEX_CD8<16, CD8VQ>;
|
||||
defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VR512, VR256X, X86vsext,
|
||||
memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VH>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VSHUFPS - VSHUFPD Operations
|
||||
|
||||
|
|
|
@ -105,6 +105,13 @@ def X86vsext : SDNode<"X86ISD::VSEXT",
|
|||
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
|
||||
SDTCisInt<0>, SDTCisInt<1>]>>;
|
||||
|
||||
def X86vtrunc : SDNode<"X86ISD::VTRUNC",
|
||||
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
|
||||
SDTCisInt<0>, SDTCisInt<1>]>>;
|
||||
def X86vtruncm : SDNode<"X86ISD::VTRUNCM",
|
||||
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
|
||||
SDTCisInt<0>, SDTCisInt<1>,
|
||||
SDTCisVec<2>, SDTCisInt<2>]>>;
|
||||
def X86vfpext : SDNode<"X86ISD::VFPEXT",
|
||||
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
|
||||
SDTCisFP<0>, SDTCisFP<1>]>>;
|
||||
|
|
|
@ -0,0 +1,127 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: trunc_16x32_to_16x8
|
||||
; CHECK: vpmovdb
|
||||
; CHECK: ret
|
||||
define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) nounwind readnone {
|
||||
%x = trunc <16 x i32> %i to <16 x i8>
|
||||
ret <16 x i8> %x
|
||||
}
|
||||
|
||||
; CHECK-LABEL: trunc_8x64_to_8x16
|
||||
; CHECK: vpmovqw
|
||||
; CHECK: ret
|
||||
define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) nounwind readnone {
|
||||
%x = trunc <8 x i64> %i to <8 x i16>
|
||||
ret <8 x i16> %x
|
||||
}
|
||||
|
||||
|
||||
; CHECK-LABEL: zext_16x8_to_16x32
|
||||
; CHECK; vpmovzxbd {{.*}}%zmm
|
||||
; CHECK: ret
|
||||
define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
|
||||
%x = zext <16 x i8> %i to <16 x i32>
|
||||
ret <16 x i32> %x
|
||||
}
|
||||
|
||||
; CHECK-LABEL: sext_16x8_to_16x32
|
||||
; CHECK; vpmovsxbd {{.*}}%zmm
|
||||
; CHECK: ret
|
||||
define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
|
||||
%x = sext <16 x i8> %i to <16 x i32>
|
||||
ret <16 x i32> %x
|
||||
}
|
||||
|
||||
|
||||
; CHECK-LABEL: zext_16x16_to_16x32
|
||||
; CHECK; vpmovzxwd {{.*}}%zmm
|
||||
; CHECK: ret
|
||||
define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %i) nounwind readnone {
|
||||
%x = zext <16 x i16> %i to <16 x i32>
|
||||
ret <16 x i32> %x
|
||||
}
|
||||
|
||||
; CHECK-LABEL: zext_8x16_to_8x64
|
||||
; CHECK; vpmovzxwq
|
||||
; CHECK: ret
|
||||
define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %i) nounwind readnone {
|
||||
%x = zext <8 x i16> %i to <8 x i64>
|
||||
ret <8 x i64> %x
|
||||
}
|
||||
|
||||
;CHECK-LABEL: fptrunc_test
|
||||
;CHECK: vcvtpd2ps {{.*}}%zmm
|
||||
;CHECK: ret
|
||||
define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
|
||||
%b = fptrunc <8 x double> %a to <8 x float>
|
||||
ret <8 x float> %b
|
||||
}
|
||||
|
||||
;CHECK-LABEL: fpext_test
|
||||
;CHECK: vcvtps2pd {{.*}}%zmm
|
||||
;CHECK: ret
|
||||
define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
|
||||
%b = fpext <8 x float> %a to <8 x double>
|
||||
ret <8 x double> %b
|
||||
}
|
||||
|
||||
; CHECK-LABEL: zext_16i1_to_16xi32
|
||||
; CHECK: vpbroadcastd LCP{{.*}}(%rip), %zmm0{%k1}{z}
|
||||
; CHECK: ret
|
||||
define <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
|
||||
%a = bitcast i16 %b to <16 x i1>
|
||||
%c = zext <16 x i1> %a to <16 x i32>
|
||||
ret <16 x i32> %c
|
||||
}
|
||||
|
||||
; CHECK-LABEL: zext_8i1_to_8xi64
|
||||
; CHECK: vpbroadcastq LCP{{.*}}(%rip), %zmm0{%k1}{z}
|
||||
; CHECK: ret
|
||||
define <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
|
||||
%a = bitcast i8 %b to <8 x i1>
|
||||
%c = zext <8 x i1> %a to <8 x i64>
|
||||
ret <8 x i64> %c
|
||||
}
|
||||
|
||||
; CHECK-LABEL: trunc_16i8_to_16i1
|
||||
; CHECK: vpmovsxbd
|
||||
; CHECK: vpandd
|
||||
; CHECK: vptestmd
|
||||
; CHECK: ret
|
||||
define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
|
||||
%mask_b = trunc <16 x i8>%a to <16 x i1>
|
||||
%mask = bitcast <16 x i1> %mask_b to i16
|
||||
ret i16 %mask
|
||||
}
|
||||
|
||||
; CHECK-LABEL: trunc_16i32_to_16i1
|
||||
; CHECK: vpandd
|
||||
; CHECK: vptestmd
|
||||
; CHECK: ret
|
||||
define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
|
||||
%mask_b = trunc <16 x i32>%a to <16 x i1>
|
||||
%mask = bitcast <16 x i1> %mask_b to i16
|
||||
ret i16 %mask
|
||||
}
|
||||
|
||||
; CHECK-LABEL: trunc_8i16_to_8i1
|
||||
; CHECK: vpmovsxwq
|
||||
; CHECK: vpandq LCP{{.*}}(%rip){1to8}
|
||||
; CHECK: vptestmq
|
||||
; CHECK: ret
|
||||
define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
|
||||
%mask_b = trunc <8 x i16>%a to <8 x i1>
|
||||
%mask = bitcast <8 x i1> %mask_b to i8
|
||||
ret i8 %mask
|
||||
}
|
||||
|
||||
; CHECK: sext_8i1_8i32
|
||||
; CHECK: vpbroadcastq LCP{{.*}}(%rip), %zmm0{%k1}{z}
|
||||
; CHECK: ret
|
||||
define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
|
||||
%x = icmp slt <8 x i32> %a1, %a2
|
||||
%x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
|
||||
%y = sext <8 x i1> %x1 to <8 x i32>
|
||||
ret <8 x i32> %y
|
||||
}
|
Loading…
Reference in New Issue