diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e82a79fa68a8..438940de264d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35455,6 +35455,8 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG, N1.getScalarValueSizeInBits() == SrcBitsPerElt && "Unexpected PACKSS/PACKUS input type"); + bool IsSigned = (X86ISD::PACKSS == Opcode); + // Constant Folding. APInt UndefElts0, UndefElts1; SmallVector EltBits0, EltBits1; @@ -35467,7 +35469,6 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG, unsigned NumSrcElts = NumDstElts / 2; unsigned NumDstEltsPerLane = NumDstElts / NumLanes; unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes; - bool IsSigned = (X86ISD::PACKSS == Opcode); APInt Undefs(NumDstElts, 0); SmallVector Bits(NumDstElts, APInt::getNullValue(DstBitsPerElt)); @@ -35511,15 +35512,14 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG, return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N)); } - // Try to combine a PACKUSWB implemented truncate with a regular truncate to - // create a larger truncate. - // TODO: Match PACKSSWB as well? - if (Subtarget.hasAVX512() && Opcode == X86ISD::PACKUS && + // Try to combine a PACKUSWB/PACKSSWB implemented truncate with a regular + // truncate to create a larger truncate. + if (Subtarget.hasAVX512() && N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 && N0.getOperand(0).getValueType() == MVT::v8i32) { - - APInt ZeroMask = APInt::getHighBitsSet(16, 8); - if (DAG.MaskedValueIsZero(N0, ZeroMask)) { + if ((IsSigned && DAG.ComputeNumSignBits(N0) > 8) || + (!IsSigned && + DAG.MaskedValueIsZero(N0, APInt::getHighBitsSet(16, 8)))) { if (Subtarget.hasVLX()) return DAG.getNode(X86ISD::VTRUNC, SDLoc(N), VT, N0.getOperand(0)); diff --git a/llvm/test/CodeGen/X86/avx512-cvt-widen.ll b/llvm/test/CodeGen/X86/avx512-cvt-widen.ll index af4b73fc79d9..849a814e640d 100644 --- a/llvm/test/CodeGen/X86/avx512-cvt-widen.ll +++ b/llvm/test/CodeGen/X86/avx512-cvt-widen.ll @@ -648,16 +648,14 @@ define <8 x i8> @f64to8sc(<8 x double> %f) { ; NOVL-LABEL: f64to8sc: ; NOVL: # %bb.0: ; NOVL-NEXT: vcvttpd2dq %zmm0, %ymm0 -; NOVL-NEXT: vpmovdw %zmm0, %ymm0 -; NOVL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; NOVL-NEXT: vpmovdb %zmm0, %xmm0 ; NOVL-NEXT: vzeroupper ; NOVL-NEXT: retq ; ; VL-LABEL: f64to8sc: ; VL: # %bb.0: ; VL-NEXT: vcvttpd2dq %zmm0, %ymm0 -; VL-NEXT: vpmovdw %ymm0, %xmm0 -; VL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; VL-NEXT: vpmovdb %ymm0, %xmm0 ; VL-NEXT: vzeroupper ; VL-NEXT: retq %res = fptosi <8 x double> %f to <8 x i8>