forked from OSchip/llvm-project
[X86] Use ISD::SIGN_EXTEND instead of X86ISD::VSEXT for mask to xmm/ymm/zmm conversion
There are a couple tricky things with this patch. I had to add an override of isVectorLoadExtDesirable to stop DAG combine from combining sign_extend with loads after legalization since we legalize sextload using a load+sign_extend. Overriding this hook actually prevents a lot sextloads from being created in the first place. I also had to add isel patterns because DAG combine blindly combines sign_extend+truncate to a smaller sign_extend which defeats what legalization was trying to do. Differential Revision: https://reviews.llvm.org/D42407 llvm-svn: 323301
This commit is contained in:
parent
fcac3c9642
commit
0321ebc054
|
@ -18652,7 +18652,7 @@ static SDValue LowerSIGN_EXTEND_Mask(SDValue Op,
|
|||
MVT WideEltVT = WideVT.getVectorElementType();
|
||||
if ((Subtarget.hasDQI() && WideEltVT.getSizeInBits() >= 32) ||
|
||||
(Subtarget.hasBWI() && WideEltVT.getSizeInBits() <= 16)) {
|
||||
V = getExtendInVec(X86ISD::VSEXT, dl, WideVT, In, DAG);
|
||||
V = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, In);
|
||||
} else {
|
||||
SDValue NegOne = getOnesVector(WideVT, DAG, dl);
|
||||
SDValue Zero = getZeroVector(WideVT, Subtarget, DAG, dl);
|
||||
|
@ -25763,7 +25763,15 @@ bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool X86TargetLowering::isVectorLoadExtDesirable(SDValue) const { return true; }
|
||||
bool X86TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
|
||||
EVT SrcVT = ExtVal.getOperand(0).getValueType();
|
||||
|
||||
// There is no extending load for vXi1.
|
||||
if (SrcVT.getScalarType() == MVT::i1)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
|
||||
|
|
|
@ -8740,7 +8740,7 @@ defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd
|
|||
multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
|
||||
def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
|
||||
!strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))],
|
||||
[(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))],
|
||||
IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;
|
||||
}
|
||||
|
||||
|
@ -8806,6 +8806,16 @@ defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
|
|||
defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
|
||||
avx512vl_i64_info, HasDQI>, VEX_W;
|
||||
|
||||
// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
|
||||
// is available, but BWI is not. We can't handle this in lowering because
|
||||
// a target independent DAG combine likes to combine sext and trunc.
|
||||
let Predicates = [HasDQI, NoBWI] in {
|
||||
def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
|
||||
(VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
|
||||
def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
|
||||
(VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 - COMPRESS and EXPAND
|
||||
//
|
||||
|
|
Loading…
Reference in New Issue