forked from OSchip/llvm-project
Lower FNEG ( FABS (x) ) -> FNABS (x) [X86 codegen] PR20578
Negative FABS of either a scalar or vector should be handled the same way on x86 with SSE/AVX: a single OR instruction of the FP operand with a constant to light up the sign bit(s). http://llvm.org/bugs/show_bug.cgi?id=20578 Differential Revision: http://reviews.llvm.org/D5201 llvm-svn: 218822
This commit is contained in:
parent
7fad1b4374
commit
9ebfbb969d
|
@ -13678,12 +13678,24 @@ static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
|
||||||
In, DAG.getUNDEF(SVT)));
|
In, DAG.getUNDEF(SVT)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// The only differences between FABS and FNEG are the mask and the logic op.
|
/// The only differences between FABS and FNEG are the mask and the logic op.
|
||||||
|
/// FNEG also has a folding opportunity for FNEG(FABS(x)).
|
||||||
static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
|
static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
|
||||||
assert((Op.getOpcode() == ISD::FABS || Op.getOpcode() == ISD::FNEG) &&
|
assert((Op.getOpcode() == ISD::FABS || Op.getOpcode() == ISD::FNEG) &&
|
||||||
"Wrong opcode for lowering FABS or FNEG.");
|
"Wrong opcode for lowering FABS or FNEG.");
|
||||||
|
|
||||||
bool IsFABS = (Op.getOpcode() == ISD::FABS);
|
bool IsFABS = (Op.getOpcode() == ISD::FABS);
|
||||||
|
|
||||||
|
// If this is a FABS and it has an FNEG user, bail out to fold the combination
|
||||||
|
// into an FNABS. We'll lower the FABS after that if it is still in use.
|
||||||
|
if (IsFABS)
|
||||||
|
for (SDNode *User : Op->uses())
|
||||||
|
if (User->getOpcode() == ISD::FNEG)
|
||||||
|
return Op;
|
||||||
|
|
||||||
|
SDValue Op0 = Op.getOperand(0);
|
||||||
|
bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
|
||||||
|
|
||||||
SDLoc dl(Op);
|
SDLoc dl(Op);
|
||||||
MVT VT = Op.getSimpleValueType();
|
MVT VT = Op.getSimpleValueType();
|
||||||
// Assume scalar op for initialization; update for vector if needed.
|
// Assume scalar op for initialization; update for vector if needed.
|
||||||
|
@ -13719,15 +13731,19 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
|
||||||
// For a vector, cast operands to a vector type, perform the logic op,
|
// For a vector, cast operands to a vector type, perform the logic op,
|
||||||
// and cast the result back to the original value type.
|
// and cast the result back to the original value type.
|
||||||
MVT VecVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
|
MVT VecVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
|
||||||
SDValue Op0Casted = DAG.getNode(ISD::BITCAST, dl, VecVT, Op.getOperand(0));
|
|
||||||
SDValue MaskCasted = DAG.getNode(ISD::BITCAST, dl, VecVT, Mask);
|
SDValue MaskCasted = DAG.getNode(ISD::BITCAST, dl, VecVT, Mask);
|
||||||
unsigned LogicOp = IsFABS ? ISD::AND : ISD::XOR;
|
SDValue Operand = IsFNABS ?
|
||||||
|
DAG.getNode(ISD::BITCAST, dl, VecVT, Op0.getOperand(0)) :
|
||||||
|
DAG.getNode(ISD::BITCAST, dl, VecVT, Op0);
|
||||||
|
unsigned BitOp = IsFABS ? ISD::AND : IsFNABS ? ISD::OR : ISD::XOR;
|
||||||
return DAG.getNode(ISD::BITCAST, dl, VT,
|
return DAG.getNode(ISD::BITCAST, dl, VT,
|
||||||
DAG.getNode(LogicOp, dl, VecVT, Op0Casted, MaskCasted));
|
DAG.getNode(BitOp, dl, VecVT, Operand, MaskCasted));
|
||||||
}
|
}
|
||||||
|
|
||||||
// If not vector, then scalar.
|
// If not vector, then scalar.
|
||||||
unsigned LogicOp = IsFABS ? X86ISD::FAND : X86ISD::FXOR;
|
unsigned BitOp = IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR;
|
||||||
return DAG.getNode(LogicOp, dl, VT, Op.getOperand(0), Mask);
|
SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;
|
||||||
|
return DAG.getNode(BitOp, dl, VT, Operand, Mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
|
static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
|
||||||
|
|
|
@ -0,0 +1,77 @@
|
||||||
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx| FileCheck %s
|
||||||
|
|
||||||
|
; Verify that we generate a single OR instruction for a scalar, vec128, and vec256
|
||||||
|
; FNABS(x) operation -> FNEG (FABS(x)).
|
||||||
|
; If the FABS() result isn't used, the AND instruction should be eliminated.
|
||||||
|
; PR20578: http://llvm.org/bugs/show_bug.cgi?id=20578
|
||||||
|
|
||||||
|
define float @scalar_no_abs(float %a) {
|
||||||
|
; CHECK-LABEL: scalar_no_abs:
|
||||||
|
; CHECK: vorps
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%fabs = tail call float @fabsf(float %a) #1
|
||||||
|
%fsub = fsub float -0.0, %fabs
|
||||||
|
ret float %fsub
|
||||||
|
}
|
||||||
|
|
||||||
|
define float @scalar_uses_abs(float %a) {
|
||||||
|
; CHECK-LABEL: scalar_uses_abs:
|
||||||
|
; CHECK-DAG: vandps
|
||||||
|
; CHECK-DAG: vorps
|
||||||
|
; CHECK: vmulss
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%fabs = tail call float @fabsf(float %a) #1
|
||||||
|
%fsub = fsub float -0.0, %fabs
|
||||||
|
%fmul = fmul float %fsub, %fabs
|
||||||
|
ret float %fmul
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x float> @vector128_no_abs(<4 x float> %a) {
|
||||||
|
; CHECK-LABEL: vector128_no_abs:
|
||||||
|
; CHECK: vorps
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%fabs = tail call <4 x float> @llvm.fabs.v4f32(< 4 x float> %a) #1
|
||||||
|
%fsub = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %fabs
|
||||||
|
ret <4 x float> %fsub
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x float> @vector128_uses_abs(<4 x float> %a) {
|
||||||
|
; CHECK-LABEL: vector128_uses_abs:
|
||||||
|
; CHECK-DAG: vandps
|
||||||
|
; CHECK-DAG: vorps
|
||||||
|
; CHECK: vmulps
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%fabs = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) #1
|
||||||
|
%fsub = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %fabs
|
||||||
|
%fmul = fmul <4 x float> %fsub, %fabs
|
||||||
|
ret <4 x float> %fmul
|
||||||
|
}
|
||||||
|
|
||||||
|
define <8 x float> @vector256_no_abs(<8 x float> %a) {
|
||||||
|
; CHECK-LABEL: vector256_no_abs:
|
||||||
|
; CHECK: vorps
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%fabs = tail call <8 x float> @llvm.fabs.v8f32(< 8 x float> %a) #1
|
||||||
|
%fsub = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %fabs
|
||||||
|
ret <8 x float> %fsub
|
||||||
|
}
|
||||||
|
|
||||||
|
define <8 x float> @vector256_uses_abs(<8 x float> %a) {
|
||||||
|
; CHECK-LABEL: vector256_uses_abs:
|
||||||
|
; CHECK-DAG: vandps
|
||||||
|
; CHECK-DAG: vorps
|
||||||
|
; CHECK: vmulps
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%fabs = tail call <8 x float> @llvm.fabs.v8f32(<8 x float> %a) #1
|
||||||
|
%fsub = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %fabs
|
||||||
|
%fmul = fmul <8 x float> %fsub, %fabs
|
||||||
|
ret <8 x float> %fmul
|
||||||
|
}
|
||||||
|
|
||||||
|
declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
|
||||||
|
declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
|
||||||
|
|
||||||
|
declare float @fabsf(float)
|
||||||
|
|
||||||
|
attributes #1 = { readnone }
|
||||||
|
|
Loading…
Reference in New Issue