forked from OSchip/llvm-project
[X86] Add ISD node for masked version of CVTPS2PH.
The 128-bit input produces 64-bits of output and fills the upper 64-bits with 0. The mask only applies to the lower elements. But we can't represent this with a vselect like we normally do. This also avoids the need to have a special X86ISD::SELECT when avx512bw isn't enabled since vselect v8i16 isn't legal there. Fixes another instruction for PR34877. llvm-svn: 350994
This commit is contained in:
parent
61aa940074
commit
33b2cf50e3
|
@ -21383,12 +21383,6 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
|
|||
case X86ISD::VPSHUFBITQMB:
|
||||
case X86ISD::VFPCLASS:
|
||||
return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
|
||||
case X86ISD::CVTPS2PH:
|
||||
// We can't use ISD::VSELECT here because it is not always "Legal"
|
||||
// for the destination type. For example vpmovqb require only AVX512
|
||||
// and vselect that can operate on byte element type require BWI
|
||||
OpcodeSelect = X86ISD::SELECT;
|
||||
break;
|
||||
}
|
||||
if (PreservedSrc.isUndef())
|
||||
PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
|
||||
|
@ -22068,9 +22062,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
SDValue Mask = Op.getOperand(3);
|
||||
|
||||
if (isAllOnesConstant(Mask))
|
||||
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl,
|
||||
Op.getValueType(), Src),
|
||||
Mask, PassThru, Subtarget, DAG);
|
||||
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src);
|
||||
|
||||
MVT SrcVT = Src.getSimpleValueType();
|
||||
MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
|
||||
|
@ -22078,6 +22070,22 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, PassThru,
|
||||
Mask);
|
||||
}
|
||||
case CVTPS2PH_MASK: {
|
||||
SDValue Src = Op.getOperand(1);
|
||||
SDValue Rnd = Op.getOperand(2);
|
||||
SDValue PassThru = Op.getOperand(3);
|
||||
SDValue Mask = Op.getOperand(4);
|
||||
|
||||
if (isAllOnesConstant(Mask))
|
||||
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src, Rnd);
|
||||
|
||||
MVT SrcVT = Src.getSimpleValueType();
|
||||
MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
|
||||
Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
|
||||
return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, Rnd,
|
||||
PassThru, Mask);
|
||||
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -27365,6 +27373,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case X86ISD::SCALAR_SINT_TO_FP_RND: return "X86ISD::SCALAR_SINT_TO_FP_RND";
|
||||
case X86ISD::SCALAR_UINT_TO_FP_RND: return "X86ISD::SCALAR_UINT_TO_FP_RND";
|
||||
case X86ISD::CVTPS2PH: return "X86ISD::CVTPS2PH";
|
||||
case X86ISD::MCVTPS2PH: return "X86ISD::MCVTPS2PH";
|
||||
case X86ISD::CVTPH2PS: return "X86ISD::CVTPH2PS";
|
||||
case X86ISD::CVTPH2PS_RND: return "X86ISD::CVTPH2PS_RND";
|
||||
case X86ISD::CVTP2SI: return "X86ISD::CVTP2SI";
|
||||
|
|
|
@ -556,6 +556,10 @@ namespace llvm {
|
|||
// Conversions between float and half-float.
|
||||
CVTPS2PH, CVTPH2PS, CVTPH2PS_RND,
|
||||
|
||||
// Masked version of above.
|
||||
// SRC, RND, PASSTHRU, MASK
|
||||
MCVTPS2PH,
|
||||
|
||||
// Galois Field Arithmetic Instructions
|
||||
GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
|
||||
|
||||
|
|
|
@ -8726,12 +8726,28 @@ let Predicates = [HasVLX] in {
|
|||
|
||||
multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
|
||||
X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
|
||||
defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
|
||||
(ins _src.RC:$src1, i32u8imm:$src2),
|
||||
"vcvtps2ph", "$src2, $src1", "$src1, $src2",
|
||||
(X86cvtps2ph (_src.VT _src.RC:$src1),
|
||||
(i32 imm:$src2)), 0, 0>,
|
||||
AVX512AIi8Base, Sched<[RR]>;
|
||||
let ExeDomain = GenericDomain in {
|
||||
def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
|
||||
(ins _src.RC:$src1, i32u8imm:$src2),
|
||||
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set _dest.RC:$dst,
|
||||
(X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)))]>,
|
||||
Sched<[RR]>;
|
||||
let Constraints = "$src0 = $dst" in
|
||||
def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
|
||||
(ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
|
||||
"vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
|
||||
[(set _dest.RC:$dst,
|
||||
(X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
|
||||
_dest.RC:$src0, _src.KRCWM:$mask))]>,
|
||||
Sched<[RR]>, EVEX_K;
|
||||
def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
|
||||
(ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
|
||||
"vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
|
||||
[(set _dest.RC:$dst,
|
||||
(X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
|
||||
_dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
|
||||
Sched<[RR]>, EVEX_KZ;
|
||||
let hasSideEffects = 0, mayStore = 1 in {
|
||||
def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
|
||||
(ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
|
||||
|
@ -8743,6 +8759,7 @@ multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
|
|||
EVEX_K, Sched<[MR]>, NotMemoryFoldable;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
|
||||
SchedWrite Sched> {
|
||||
|
|
|
@ -595,6 +595,13 @@ def X86cvtps2ph : SDNode<"X86ISD::CVTPS2PH",
|
|||
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
|
||||
SDTCVecEltisVT<1, f32>,
|
||||
SDTCisVT<2, i32>]> >;
|
||||
def X86mcvtps2ph : SDNode<"X86ISD::MCVTPS2PH",
|
||||
SDTypeProfile<1, 4, [SDTCVecEltisVT<0, i16>,
|
||||
SDTCVecEltisVT<1, f32>,
|
||||
SDTCisVT<2, i32>,
|
||||
SDTCisSameAs<0, 3>,
|
||||
SDTCVecEltisVT<4, i1>,
|
||||
SDTCisSameNumEltsAs<1, 4>]> >;
|
||||
def X86vfpextRnd : SDNode<"X86ISD::VFPEXT_RND",
|
||||
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
|
||||
SDTCVecEltisVT<1, f32>,
|
||||
|
|
|
@ -32,7 +32,7 @@ enum IntrinsicType : uint16_t {
|
|||
IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK,
|
||||
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK,
|
||||
COMPRESS_EXPAND_IN_REG,
|
||||
TRUNCATE_TO_REG,
|
||||
TRUNCATE_TO_REG, CVTPS2PH_MASK,
|
||||
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
|
||||
FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
|
||||
FIXUPIMMS_MASKZ, GATHER_AVX2,
|
||||
|
@ -838,12 +838,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86ISD::CVTPH2PS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTPH2PS, X86ISD::CVTPH2PS_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, INTR_TYPE_2OP_MASK,
|
||||
X86ISD::CVTPS2PH, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, INTR_TYPE_2OP_MASK,
|
||||
X86ISD::CVTPS2PH, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, INTR_TYPE_2OP_MASK,
|
||||
X86ISD::CVTPS2PH, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, CVTPS2PH_MASK,
|
||||
X86ISD::CVTPS2PH, X86ISD::MCVTPS2PH),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, CVTPS2PH_MASK,
|
||||
X86ISD::CVTPS2PH, X86ISD::MCVTPS2PH),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, CVTPS2PH_MASK,
|
||||
X86ISD::CVTPS2PH, X86ISD::MCVTPS2PH),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_128, CMP_MASK,
|
||||
X86ISD::VPSHUFBITQMB, 0),
|
||||
|
|
|
@ -990,8 +990,8 @@ define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0, <16 x i16> %src, i16
|
|||
; CHECK-LABEL: test_x86_vcvtps2ph_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vpaddw %ymm1, %ymm2, %ymm1
|
||||
; CHECK-NEXT: vcvtps2ph $2, %zmm0, (%rsi)
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
|
|
|
@ -4306,21 +4306,21 @@ define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0, i8 %mask, <8 x i16> %s
|
|||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vcvtps2ph $2, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc2,0x02]
|
||||
; X86-NEXT: vcvtps2ph $2, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x1d,0xc3,0x02]
|
||||
; X86-NEXT: vcvtps2ph $2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1d,0xc1,0x02]
|
||||
; X86-NEXT: vcvtps2ph $2, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x1d,0xc2,0x02]
|
||||
; X86-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
|
||||
; X86-NEXT: vcvtps2ph $2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x02]
|
||||
; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
|
||||
; X86-NEXT: vpaddw %xmm1, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfd,0xc1]
|
||||
; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtps2ph_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvtps2ph $2, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc2,0x02]
|
||||
; X64-NEXT: vcvtps2ph $2, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x1d,0xc3,0x02]
|
||||
; X64-NEXT: vcvtps2ph $2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1d,0xc1,0x02]
|
||||
; X64-NEXT: vcvtps2ph $2, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x1d,0xc2,0x02]
|
||||
; X64-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
|
||||
; X64-NEXT: vcvtps2ph $2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x02]
|
||||
; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
|
||||
; X64-NEXT: vpaddw %xmm1, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfd,0xc1]
|
||||
; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
|
||||
%res2 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 %mask)
|
||||
|
@ -4337,22 +4337,22 @@ define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0, i8 %mask, <8 x i16> %s
|
|||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vcvtps2ph $2, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc2,0x02]
|
||||
; X86-NEXT: vcvtps2ph $2, %ymm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x1d,0xc3,0x02]
|
||||
; X86-NEXT: vcvtps2ph $2, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1d,0xc1,0x02]
|
||||
; X86-NEXT: vcvtps2ph $2, %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x1d,0xc2,0x02]
|
||||
; X86-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
|
||||
; X86-NEXT: vcvtps2ph $2, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x02]
|
||||
; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
|
||||
; X86-NEXT: vpaddw %xmm1, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfd,0xc1]
|
||||
; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
|
||||
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtps2ph_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvtps2ph $2, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc2,0x02]
|
||||
; X64-NEXT: vcvtps2ph $2, %ymm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x1d,0xc3,0x02]
|
||||
; X64-NEXT: vcvtps2ph $2, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1d,0xc1,0x02]
|
||||
; X64-NEXT: vcvtps2ph $2, %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x1d,0xc2,0x02]
|
||||
; X64-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
|
||||
; X64-NEXT: vcvtps2ph $2, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x02]
|
||||
; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
|
||||
; X64-NEXT: vpaddw %xmm1, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfd,0xc1]
|
||||
; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
|
||||
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
|
||||
|
|
Loading…
Reference in New Issue