[x86] allow FP-logic ops when one operand is FP and result is FP

We save an inter-register file move this way. If there's any CPU where
the FP logic is slower, we could transform this back to int-logic in 
MachineCombiner.

This helps, but doesn't solve, PR6137:
https://llvm.org/bugs/show_bug.cgi?id=6137

The 'andn' test shows that we're missing a pattern match to
recognize the xor with -1 constant as a 'not' op.

llvm-svn: 287171
This commit is contained in:
Sanjay Patel 2016-11-16 22:34:05 +00:00
parent f33f91af24
commit 066139a3ec
3 changed files with 38 additions and 36 deletions

View File

@ -26971,11 +26971,10 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
}
// Convert a bitcasted integer logic operation that has one bitcasted
// floating-point operand and one constant operand into a floating-point
// logic operation. This may create a load of the constant, but that is
// cheaper than materializing the constant in an integer register and
// transferring it to an SSE register or transferring the SSE operand to
// integer register and back.
// floating-point operand into a floating-point logic operation. This may
// create a load of a constant, but that is cheaper than materializing the
// constant in an integer register and transferring it to an SSE register or
// transferring the SSE operand to integer register and back.
unsigned FPOpcode;
switch (N0.getOpcode()) {
case ISD::AND: FPOpcode = X86ISD::FAND; break;
@ -26983,20 +26982,33 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
default: return SDValue();
}
if (((Subtarget.hasSSE1() && VT == MVT::f32) ||
(Subtarget.hasSSE2() && VT == MVT::f64)) &&
isa<ConstantSDNode>(N0.getOperand(1)) &&
N0.getOperand(0).getOpcode() == ISD::BITCAST &&
N0.getOperand(0).getOperand(0).getValueType() == VT) {
SDValue N000 = N0.getOperand(0).getOperand(0);
SDValue FPConst = DAG.getBitcast(VT, N0.getOperand(1));
return DAG.getNode(FPOpcode, SDLoc(N0), VT, N000, FPConst);
if (!((Subtarget.hasSSE1() && VT == MVT::f32) ||
(Subtarget.hasSSE2() && VT == MVT::f64)))
return SDValue();
SDValue LogicOp0 = N0.getOperand(0);
SDValue LogicOp1 = N0.getOperand(1);
SDLoc DL0(N0);
// bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
if (N0.hasOneUse() && LogicOp0.getOpcode() == ISD::BITCAST &&
LogicOp0.hasOneUse() && LogicOp0.getOperand(0).getValueType() == VT &&
!isa<ConstantSDNode>(LogicOp0.getOperand(0))) {
SDValue CastedOp1 = DAG.getBitcast(VT, LogicOp1);
return DAG.getNode(FPOpcode, DL0, VT, LogicOp0.getOperand(0), CastedOp1);
}
// bitcast(logic(X, bitcast(Y))) --> logic'(bitcast(X), Y)
if (N0.hasOneUse() && LogicOp1.getOpcode() == ISD::BITCAST &&
LogicOp1.hasOneUse() && LogicOp1.getOperand(0).getValueType() == VT &&
!isa<ConstantSDNode>(LogicOp1.getOperand(0))) {
SDValue CastedOp0 = DAG.getBitcast(VT, LogicOp0);
return DAG.getNode(FPOpcode, DL0, VT, LogicOp1.getOperand(0), CastedOp0);
}
return SDValue();
}
// Match a binop + shuffle pyramid that represents a horizontal reduction over
// the elements of a vector.
// Returns the vector that is being reduced on, or SDValue() if a reduction

View File

@ -29,20 +29,16 @@ define double @FsANDPSrr(double %x, double %y) {
define double @FsANDNPSrr(double %x, double %y) {
; SSE-LABEL: FsANDNPSrr:
; SSE: # BB#0:
; SSE-NEXT: movd %xmm0, %rax
; SSE-NEXT: movd %xmm1, %rcx
; SSE-NEXT: notq %rcx
; SSE-NEXT: andq %rax, %rcx
; SSE-NEXT: movd %rcx, %xmm0
; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
; SSE-NEXT: xorpd %xmm1, %xmm2
; SSE-NEXT: andpd %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: FsANDNPSrr:
; AVX: # BB#0:
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: vmovq %xmm1, %rcx
; AVX-NEXT: notq %rcx
; AVX-NEXT: andq %rax, %rcx
; AVX-NEXT: vmovq %rcx, %xmm0
; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
; AVX-NEXT: vxorpd %xmm2, %xmm1, %xmm1
; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
%bc1 = bitcast double %x to i64

View File

@ -3,13 +3,9 @@
; PR22428: https://llvm.org/bugs/show_bug.cgi?id=22428
; f1, f2, f3, and f4 should use an integer logic instruction.
; f9 and f10 should use an FP (SSE) logic instruction.
; f5, f6, f9, and f10 should use an FP (SSE) logic instruction.
;
; f5, f6, f7, and f8 are less clear.
;
; For f5 and f6, we can save a register move by using an FP logic instruction,
; but we may need to calculate the relative costs of an SSE op vs. int op vs.
; scalar <-> SSE register moves.
; f7 and f8 are less clear.
;
; For f7 and f8, the SSE instructions don't take immediate operands, so if we
; use one of those, we either have to load a constant from memory or move the
@ -79,9 +75,8 @@ define i32 @f4(float %x) {
define float @f5(float %x, i32 %y) {
; CHECK-LABEL: f5:
; CHECK: # BB#0:
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: movd %edi, %xmm1
; CHECK-NEXT: andps %xmm1, %xmm0
; CHECK-NEXT: retq
;
%bc1 = bitcast float %x to i32
@ -95,9 +90,8 @@ define float @f5(float %x, i32 %y) {
define float @f6(float %x, i32 %y) {
; CHECK-LABEL: f6:
; CHECK: # BB#0:
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: movd %edi, %xmm1
; CHECK-NEXT: andps %xmm1, %xmm0
; CHECK-NEXT: retq
;
%bc1 = bitcast float %x to i32