diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9397f8c9cf52..01624a76a8e5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -26971,11 +26971,10 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, } // Convert a bitcasted integer logic operation that has one bitcasted - // floating-point operand and one constant operand into a floating-point - // logic operation. This may create a load of the constant, but that is - // cheaper than materializing the constant in an integer register and - // transferring it to an SSE register or transferring the SSE operand to - // integer register and back. + // floating-point operand into a floating-point logic operation. This may + // create a load of a constant, but that is cheaper than materializing the + // constant in an integer register and transferring it to an SSE register or + // transferring the SSE operand to integer register and back. unsigned FPOpcode; switch (N0.getOpcode()) { case ISD::AND: FPOpcode = X86ISD::FAND; break; @@ -26983,20 +26982,33 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, case ISD::XOR: FPOpcode = X86ISD::FXOR; break; default: return SDValue(); } - if (((Subtarget.hasSSE1() && VT == MVT::f32) || - (Subtarget.hasSSE2() && VT == MVT::f64)) && - isa(N0.getOperand(1)) && - N0.getOperand(0).getOpcode() == ISD::BITCAST && - N0.getOperand(0).getOperand(0).getValueType() == VT) { - SDValue N000 = N0.getOperand(0).getOperand(0); - SDValue FPConst = DAG.getBitcast(VT, N0.getOperand(1)); - return DAG.getNode(FPOpcode, SDLoc(N0), VT, N000, FPConst); + + if (!((Subtarget.hasSSE1() && VT == MVT::f32) || + (Subtarget.hasSSE2() && VT == MVT::f64))) + return SDValue(); + + SDValue LogicOp0 = N0.getOperand(0); + SDValue LogicOp1 = N0.getOperand(1); + SDLoc DL0(N0); + + // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y)) + if (N0.hasOneUse() && LogicOp0.getOpcode() == ISD::BITCAST && + LogicOp0.hasOneUse() && LogicOp0.getOperand(0).getValueType() == VT && + !isa(LogicOp0.getOperand(0))) { + SDValue CastedOp1 = DAG.getBitcast(VT, LogicOp1); + return DAG.getNode(FPOpcode, DL0, VT, LogicOp0.getOperand(0), CastedOp1); + } + // bitcast(logic(X, bitcast(Y))) --> logic'(bitcast(X), Y) + if (N0.hasOneUse() && LogicOp1.getOpcode() == ISD::BITCAST && + LogicOp1.hasOneUse() && LogicOp1.getOperand(0).getValueType() == VT && + !isa(LogicOp1.getOperand(0))) { + SDValue CastedOp0 = DAG.getBitcast(VT, LogicOp0); + return DAG.getNode(FPOpcode, DL0, VT, LogicOp1.getOperand(0), CastedOp0); } return SDValue(); } - // Match a binop + shuffle pyramid that represents a horizontal reduction over // the elements of a vector. // Returns the vector that is being reduced on, or SDValue() if a reduction diff --git a/llvm/test/CodeGen/X86/fp-logic-replace.ll b/llvm/test/CodeGen/X86/fp-logic-replace.ll index 47e076887028..50e2c1b2029b 100644 --- a/llvm/test/CodeGen/X86/fp-logic-replace.ll +++ b/llvm/test/CodeGen/X86/fp-logic-replace.ll @@ -29,20 +29,16 @@ define double @FsANDPSrr(double %x, double %y) { define double @FsANDNPSrr(double %x, double %y) { ; SSE-LABEL: FsANDNPSrr: ; SSE: # BB#0: -; SSE-NEXT: movd %xmm0, %rax -; SSE-NEXT: movd %xmm1, %rcx -; SSE-NEXT: notq %rcx -; SSE-NEXT: andq %rax, %rcx -; SSE-NEXT: movd %rcx, %xmm0 +; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; SSE-NEXT: xorpd %xmm1, %xmm2 +; SSE-NEXT: andpd %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: FsANDNPSrr: ; AVX: # BB#0: -; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: vmovq %xmm1, %rcx -; AVX-NEXT: notq %rcx -; AVX-NEXT: andq %rax, %rcx -; AVX-NEXT: vmovq %rcx, %xmm0 +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vxorpd %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; %bc1 = bitcast double %x to i64 diff --git a/llvm/test/CodeGen/X86/fp-logic.ll b/llvm/test/CodeGen/X86/fp-logic.ll index 2c6698fb1207..301fa8f41377 100644 --- a/llvm/test/CodeGen/X86/fp-logic.ll +++ b/llvm/test/CodeGen/X86/fp-logic.ll @@ -3,13 +3,9 @@ ; PR22428: https://llvm.org/bugs/show_bug.cgi?id=22428 ; f1, f2, f3, and f4 should use an integer logic instruction. -; f9 and f10 should use an FP (SSE) logic instruction. +; f5, f6, f9, and f10 should use an FP (SSE) logic instruction. ; -; f5, f6, f7, and f8 are less clear. -; -; For f5 and f6, we can save a register move by using an FP logic instruction, -; but we may need to calculate the relative costs of an SSE op vs. int op vs. -; scalar <-> SSE register moves. +; f7 and f8 are less clear. ; ; For f7 and f8, the SSE instructions don't take immediate operands, so if we ; use one of those, we either have to load a constant from memory or move the @@ -79,9 +75,8 @@ define i32 @f4(float %x) { define float @f5(float %x, i32 %y) { ; CHECK-LABEL: f5: ; CHECK: # BB#0: -; CHECK-NEXT: movd %xmm0, %eax -; CHECK-NEXT: andl %edi, %eax -; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: movd %edi, %xmm1 +; CHECK-NEXT: andps %xmm1, %xmm0 ; CHECK-NEXT: retq ; %bc1 = bitcast float %x to i32 @@ -95,9 +90,8 @@ define float @f5(float %x, i32 %y) { define float @f6(float %x, i32 %y) { ; CHECK-LABEL: f6: ; CHECK: # BB#0: -; CHECK-NEXT: movd %xmm0, %eax -; CHECK-NEXT: andl %edi, %eax -; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: movd %edi, %xmm1 +; CHECK-NEXT: andps %xmm1, %xmm0 ; CHECK-NEXT: retq ; %bc1 = bitcast float %x to i32