diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index eca63f80ae0d..9a553d61bfbf 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -14661,12 +14661,29 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, return EltsFromConsecutiveLoads(VT, Elts, dl, DAG); } -/// PerformTruncateCombine - Converts truncate operation to -/// a sequence of vector shuffle operations. -/// It is possible when we truncate 256-bit vector to 128-bit vector +/// PerformTruncateCombine - In some cases a sequence with "truncate" +/// operation may be simplified. static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + if (DCI.isBeforeLegalize() || !VT.isVector()) + return SDValue(); + + SDValue In = N->getOperand(0); + // Optimize the sequence setcc -> truncate + if (In.getOpcode() == ISD::SETCC) { + DebugLoc DL = N->getDebugLoc(); + EVT InVT = In.getValueType(); + + // The vector element is all ones or all zero. Just take a half of it. + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(), + InVT.getVectorNumElements()/2); + SDValue HalfVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, In, + DAG.getIntPtrConstant(0)); + assert(HalfVT.getSizeInBits() == VT.getSizeInBits()); + return DAG.getNode(ISD::BITCAST, DL, VT, HalfVec); + } return SDValue(); } diff --git a/llvm/test/CodeGen/X86/avx-trunc.ll b/llvm/test/CodeGen/X86/avx-trunc.ll index d0077366444d..aa186a05f217 100755 --- a/llvm/test/CodeGen/X86/avx-trunc.ll +++ b/llvm/test/CodeGen/X86/avx-trunc.ll @@ -13,3 +13,18 @@ define <8 x i16> @trunc_32_16(<8 x i32> %A) nounwind uwtable readnone ssp{ ret <8 x i16>%B } +define <8 x i16> @trunc_after_setcc(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) { +; CHECK: trunc_after_setcc +; CHECK: vcmpltps +; CHECK-NOT: vextract +; CHECK: vcmpltps +; CHECK-NEXT: vandps +; CHECK-NEXT: vandps +; CHECK: ret + %res1 = fcmp olt <8 x float> %a, %b + %res2 = fcmp olt <8 x float> %c, %d + %andr = and <8 x i1>%res1, %res2 + %ex = zext <8 x i1> %andr to <8 x i16> + ret <8 x i16>%ex +} +