forked from OSchip/llvm-project
[x86] allow FP-logic ops when one operand is FP and result is FP
We save an inter-register file move this way. If there's any CPU where the FP logic is slower, we could transform this back to int-logic in MachineCombiner. This helps, but doesn't solve, PR6137: https://llvm.org/bugs/show_bug.cgi?id=6137 The 'andn' test shows that we're missing a pattern match to recognize the xor with -1 constant as a 'not' op. llvm-svn: 287171
This commit is contained in:
parent
f33f91af24
commit
066139a3ec
|
@ -26971,11 +26971,10 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
|
|||
}
|
||||
|
||||
// Convert a bitcasted integer logic operation that has one bitcasted
|
||||
// floating-point operand and one constant operand into a floating-point
|
||||
// logic operation. This may create a load of the constant, but that is
|
||||
// cheaper than materializing the constant in an integer register and
|
||||
// transferring it to an SSE register or transferring the SSE operand to
|
||||
// integer register and back.
|
||||
// floating-point operand into a floating-point logic operation. This may
|
||||
// create a load of a constant, but that is cheaper than materializing the
|
||||
// constant in an integer register and transferring it to an SSE register or
|
||||
// transferring the SSE operand to integer register and back.
|
||||
unsigned FPOpcode;
|
||||
switch (N0.getOpcode()) {
|
||||
case ISD::AND: FPOpcode = X86ISD::FAND; break;
|
||||
|
@ -26983,20 +26982,33 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
|
|||
case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
|
||||
default: return SDValue();
|
||||
}
|
||||
if (((Subtarget.hasSSE1() && VT == MVT::f32) ||
|
||||
(Subtarget.hasSSE2() && VT == MVT::f64)) &&
|
||||
isa<ConstantSDNode>(N0.getOperand(1)) &&
|
||||
N0.getOperand(0).getOpcode() == ISD::BITCAST &&
|
||||
N0.getOperand(0).getOperand(0).getValueType() == VT) {
|
||||
SDValue N000 = N0.getOperand(0).getOperand(0);
|
||||
SDValue FPConst = DAG.getBitcast(VT, N0.getOperand(1));
|
||||
return DAG.getNode(FPOpcode, SDLoc(N0), VT, N000, FPConst);
|
||||
|
||||
if (!((Subtarget.hasSSE1() && VT == MVT::f32) ||
|
||||
(Subtarget.hasSSE2() && VT == MVT::f64)))
|
||||
return SDValue();
|
||||
|
||||
SDValue LogicOp0 = N0.getOperand(0);
|
||||
SDValue LogicOp1 = N0.getOperand(1);
|
||||
SDLoc DL0(N0);
|
||||
|
||||
// bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
|
||||
if (N0.hasOneUse() && LogicOp0.getOpcode() == ISD::BITCAST &&
|
||||
LogicOp0.hasOneUse() && LogicOp0.getOperand(0).getValueType() == VT &&
|
||||
!isa<ConstantSDNode>(LogicOp0.getOperand(0))) {
|
||||
SDValue CastedOp1 = DAG.getBitcast(VT, LogicOp1);
|
||||
return DAG.getNode(FPOpcode, DL0, VT, LogicOp0.getOperand(0), CastedOp1);
|
||||
}
|
||||
// bitcast(logic(X, bitcast(Y))) --> logic'(bitcast(X), Y)
|
||||
if (N0.hasOneUse() && LogicOp1.getOpcode() == ISD::BITCAST &&
|
||||
LogicOp1.hasOneUse() && LogicOp1.getOperand(0).getValueType() == VT &&
|
||||
!isa<ConstantSDNode>(LogicOp1.getOperand(0))) {
|
||||
SDValue CastedOp0 = DAG.getBitcast(VT, LogicOp0);
|
||||
return DAG.getNode(FPOpcode, DL0, VT, LogicOp1.getOperand(0), CastedOp0);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
||||
// Match a binop + shuffle pyramid that represents a horizontal reduction over
|
||||
// the elements of a vector.
|
||||
// Returns the vector that is being reduced on, or SDValue() if a reduction
|
||||
|
|
|
@ -29,20 +29,16 @@ define double @FsANDPSrr(double %x, double %y) {
|
|||
define double @FsANDNPSrr(double %x, double %y) {
|
||||
; SSE-LABEL: FsANDNPSrr:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movd %xmm0, %rax
|
||||
; SSE-NEXT: movd %xmm1, %rcx
|
||||
; SSE-NEXT: notq %rcx
|
||||
; SSE-NEXT: andq %rax, %rcx
|
||||
; SSE-NEXT: movd %rcx, %xmm0
|
||||
; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
|
||||
; SSE-NEXT: xorpd %xmm1, %xmm2
|
||||
; SSE-NEXT: andpd %xmm2, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: FsANDNPSrr:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovq %xmm0, %rax
|
||||
; AVX-NEXT: vmovq %xmm1, %rcx
|
||||
; AVX-NEXT: notq %rcx
|
||||
; AVX-NEXT: andq %rax, %rcx
|
||||
; AVX-NEXT: vmovq %rcx, %xmm0
|
||||
; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
||||
; AVX-NEXT: vxorpd %xmm2, %xmm1, %xmm1
|
||||
; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
%bc1 = bitcast double %x to i64
|
||||
|
|
|
@ -3,13 +3,9 @@
|
|||
|
||||
; PR22428: https://llvm.org/bugs/show_bug.cgi?id=22428
|
||||
; f1, f2, f3, and f4 should use an integer logic instruction.
|
||||
; f9 and f10 should use an FP (SSE) logic instruction.
|
||||
; f5, f6, f9, and f10 should use an FP (SSE) logic instruction.
|
||||
;
|
||||
; f5, f6, f7, and f8 are less clear.
|
||||
;
|
||||
; For f5 and f6, we can save a register move by using an FP logic instruction,
|
||||
; but we may need to calculate the relative costs of an SSE op vs. int op vs.
|
||||
; scalar <-> SSE register moves.
|
||||
; f7 and f8 are less clear.
|
||||
;
|
||||
; For f7 and f8, the SSE instructions don't take immediate operands, so if we
|
||||
; use one of those, we either have to load a constant from memory or move the
|
||||
|
@ -79,9 +75,8 @@ define i32 @f4(float %x) {
|
|||
define float @f5(float %x, i32 %y) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movd %xmm0, %eax
|
||||
; CHECK-NEXT: andl %edi, %eax
|
||||
; CHECK-NEXT: movd %eax, %xmm0
|
||||
; CHECK-NEXT: movd %edi, %xmm1
|
||||
; CHECK-NEXT: andps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
%bc1 = bitcast float %x to i32
|
||||
|
@ -95,9 +90,8 @@ define float @f5(float %x, i32 %y) {
|
|||
define float @f6(float %x, i32 %y) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movd %xmm0, %eax
|
||||
; CHECK-NEXT: andl %edi, %eax
|
||||
; CHECK-NEXT: movd %eax, %xmm0
|
||||
; CHECK-NEXT: movd %edi, %xmm1
|
||||
; CHECK-NEXT: andps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
%bc1 = bitcast float %x to i32
|
||||
|
|
Loading…
Reference in New Issue