diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 08061a1bcc51..e8d47c62b317 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34389,6 +34389,24 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) { if (VT != MVT::f32 && VT != MVT::f64) return SDValue(); + // Vector FP selects don't fit the pattern of FP math ops (because the + // condition has a different type and we have to change the opcode), so deal + // with those here. + if (Vec.getOpcode() == ISD::VSELECT && + Vec.getOperand(0).getOpcode() == ISD::SETCC && + Vec.getOperand(0).getOperand(0).getValueType() == VecVT) { + // ext (sel Cond, X, Y), 0 --> sel (ext Cond, 0), (ext X, 0), (ext Y, 0) + SDLoc DL(ExtElt); + SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + Vec.getOperand(0).getValueType().getScalarType(), + Vec.getOperand(0), Index); + SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, + Vec.getOperand(1), Index); + SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, + Vec.getOperand(2), Index); + return DAG.getNode(ISD::SELECT, DL, VT, Ext0, Ext1, Ext2); + } + // TODO: This switch could include FNEG and the x86-specific FP logic ops // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid // missed load folding and fma+fneg combining. diff --git a/llvm/test/CodeGen/X86/extractelement-fp.ll b/llvm/test/CodeGen/X86/extractelement-fp.ll index 206348eb2081..d77671c518d4 100644 --- a/llvm/test/CodeGen/X86/extractelement-fp.ll +++ b/llvm/test/CodeGen/X86/extractelement-fp.ll @@ -155,7 +155,7 @@ define i1 @fcmp_v4f64(<4 x double> %x, <4 x double> %y) nounwind { define float @select_fcmp_v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %w) nounwind { ; CHECK-LABEL: select_fcmp_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vcmpneq_oqps %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vcmpneq_oqss %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 ; CHECK-NEXT: retq %c = fcmp one <4 x float> %x, %y @@ -167,9 +167,8 @@ define float @select_fcmp_v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z, define double @select_fcmp_v4f64(<4 x double> %x, <4 x double> %y, <4 x double> %z, <4 x double> %w) nounwind { ; CHECK-LABEL: select_fcmp_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vcmpnltpd %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: vblendvpd %ymm0, %ymm2, %ymm3, %ymm0 -; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; CHECK-NEXT: vcmpnltsd %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %c = fcmp ule <4 x double> %x, %y