LegalizeDAG: Fix and improve FCOPYSIGN/FABS legalization

- Factor out code to query and modify the sign bit of a floatingpoint
  value as an integer. This also works if none of the targets integer
  types is big enough to hold all bits of the floatingpoint value.

- Legalize FABS(x) as FCOPYSIGN(x, 0.0) if FCOPYSIGN is available,
  otherwise perform bit manipulation on the sign bit. The previous code
  used "x >u 0 ? x : -x" which is incorrect for x being -0.0! It also
  takes 34 instructions on ARM Cortex-M4. With this patch we only
  require 5:
    vldr d0, LCPI0_0
    vmov r2, r3, d0
    lsrs r2, r3, #31
    bfi r1, r2, #31, #1
    bx lr
  (This could be further improved if the compiler would recognize that
   r2, r3 is zero).

- Only lower FCOPYSIGN(x, y) = sign(x) ? -FABS(x) : FABS(x) if FABS is
  available otherwise perform bit manipulation on the sign bit.

- Perform the sign(x) test by masking out the sign bit and comparing
  with 0 rather than shifting the sign bit to the highest position and
  testing for "<s 0". For x86 copysignl (on 80bit values) this gets us:
    testl $32768, %eax
  rather than:
    shlq $48, %rax
    sets %al
    testb %al, %al

Differential Revision: http://reviews.llvm.org/D11172

llvm-svn: 252839
This commit is contained in:
Matthias Braun 2015-11-12 01:02:47 +00:00
parent dc3135db05
commit b9610a6bc2
3 changed files with 152 additions and 79 deletions

View File

@ -39,6 +39,10 @@ using namespace llvm;
#define DEBUG_TYPE "legalizedag" #define DEBUG_TYPE "legalizedag"
namespace {
struct FloatSignAsInt;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
/// This takes an arbitrary SelectionDAG as input and /// This takes an arbitrary SelectionDAG as input and
/// hacks on it until the target machine can handle it. This involves /// hacks on it until the target machine can handle it. This involves
@ -51,7 +55,6 @@ using namespace llvm;
/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this /// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
/// will attempt merge setcc and brc instructions into brcc's. /// will attempt merge setcc and brc instructions into brcc's.
/// ///
namespace {
class SelectionDAGLegalize { class SelectionDAGLegalize {
const TargetMachine &TM; const TargetMachine &TM;
const TargetLowering &TLI; const TargetLowering &TLI;
@ -130,7 +133,11 @@ private:
SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
void ExpandDYNAMIC_STACKALLOC(SDNode *Node, void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
SmallVectorImpl<SDValue> &Results); SmallVectorImpl<SDValue> &Results);
SDValue ExpandFCOPYSIGN(SDNode *Node); void getSignAsIntValue(FloatSignAsInt &State, SDLoc DL, SDValue Value) const;
SDValue modifySignAsInt(const FloatSignAsInt &State, SDLoc DL,
SDValue NewIntValue) const;
SDValue ExpandFCOPYSIGN(SDNode *Node) const;
SDValue ExpandFABS(SDNode *Node) const;
SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT, SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT,
SDLoc dl); SDLoc dl);
SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
@ -1585,69 +1592,143 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
false, false, false, 0); false, false, false, 0);
} }
SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { namespace {
SDLoc dl(Node); /// Keeps track of state when getting the sign of a floating-point value as an
SDValue Tmp1 = Node->getOperand(0); /// integer.
SDValue Tmp2 = Node->getOperand(1); struct FloatSignAsInt {
EVT FloatVT;
SDValue Chain;
SDValue FloatPtr;
SDValue IntPtr;
MachinePointerInfo IntPointerInfo;
MachinePointerInfo FloatPointerInfo;
SDValue IntValue;
APInt SignMask;
};
}
// Get the sign bit of the RHS. First obtain a value that has the same /// Bitcast a floating-point value to an integer value. Only bitcast the part
// sign as the sign bit, i.e. negative if and only if the sign bit is 1. /// containing the sign bit if the target has no integer value capable of
SDValue SignBit; /// holding all bits of the floating-point value.
EVT FloatVT = Tmp2.getValueType(); void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits()); SDLoc DL, SDValue Value) const {
EVT FloatVT = Value.getValueType();
unsigned NumBits = FloatVT.getSizeInBits();
State.FloatVT = FloatVT;
EVT IVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
// Convert to an integer of the same size.
if (TLI.isTypeLegal(IVT)) { if (TLI.isTypeLegal(IVT)) {
// Convert to an integer with the same sign bit. State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value);
SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2); State.SignMask = APInt::getSignBit(NumBits);
} else { return;
auto &DL = DAG.getDataLayout(); }
auto &DataLayout = DAG.getDataLayout();
// Store the float to memory, then load the sign part out as an integer. // Store the float to memory, then load the sign part out as an integer.
MVT LoadTy = TLI.getPointerTy(DL); MVT LoadTy = TLI.getRegisterType(*DAG.getContext(), MVT::i8);
// First create a temporary that is aligned for both the load and store. // First create a temporary that is aligned for both the load and store.
SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy); SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
// Then store the float to it. // Then store the float to it.
SDValue Ch = State.FloatPtr = StackPtr;
DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(), MachineFunction &MF = DAG.getMachineFunction();
false, false, 0); State.FloatPointerInfo = MachinePointerInfo::getFixedStack(MF, FI);
if (DL.isBigEndian()) { State.Chain = DAG.getStore(DAG.getEntryNode(), DL, Value, State.FloatPtr,
State.FloatPointerInfo, false, false, 0);
SDValue IntPtr;
if (DataLayout.isBigEndian()) {
assert(FloatVT.isByteSized() && "Unsupported floating point type!"); assert(FloatVT.isByteSized() && "Unsupported floating point type!");
// Load out a legal integer with the same sign bit as the float. // Load out a legal integer with the same sign bit as the float.
SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(), IntPtr = StackPtr;
false, false, false, 0); State.IntPointerInfo = State.FloatPointerInfo;
} else { // Little endian } else {
SDValue LoadPtr = StackPtr; // Advance the pointer so that the loaded byte will contain the sign bit.
// The float may be wider than the integer we are going to load. Advance unsigned ByteOffset = (FloatVT.getSizeInBits() / 8) - 1;
// the pointer so that the loaded integer will contain the sign bit. IntPtr = DAG.getNode(ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits(); DAG.getConstant(ByteOffset, DL, StackPtr.getValueType()));
unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8; State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI,
LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), LoadPtr, ByteOffset);
DAG.getConstant(ByteOffset, dl,
LoadPtr.getValueType()));
// Load a legal integer containing the sign bit.
SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
false, false, false, 0);
// Move the sign bit to the top bit of the loaded integer.
unsigned BitShift = LoadTy.getSizeInBits() -
(FloatVT.getSizeInBits() - 8 * ByteOffset);
assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?");
if (BitShift)
SignBit = DAG.getNode(
ISD::SHL, dl, LoadTy, SignBit,
DAG.getConstant(BitShift, dl,
TLI.getShiftAmountTy(SignBit.getValueType(), DL)));
} }
State.IntPtr = IntPtr;
State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain,
IntPtr, State.IntPointerInfo, MVT::i8,
false, false, false, 0);
State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7);
} }
// Now get the sign bit proper, by seeing whether the value is negative.
SignBit = DAG.getSetCC(dl, getSetCCResultType(SignBit.getValueType()), /// Replace the integer value produced by getSignAsIntValue() with a new value
SignBit, /// and cast the result back to a floating-point type.
DAG.getConstant(0, dl, SignBit.getValueType()), SDValue SelectionDAGLegalize::modifySignAsInt(const FloatSignAsInt &State,
ISD::SETLT); SDLoc DL, SDValue NewIntValue) const {
// Get the absolute value of the result. if (!State.Chain)
SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1); return DAG.getNode(ISD::BITCAST, DL, State.FloatVT, NewIntValue);
// Select between the nabs and abs value based on the sign bit of
// the input. // Override the part containing the sign bit in the value stored on the stack.
return DAG.getSelect(dl, AbsVal.getValueType(), SignBit, SDValue Chain = DAG.getTruncStore(State.Chain, DL, NewIntValue, State.IntPtr,
DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), State.IntPointerInfo, MVT::i8, false, false,
AbsVal); 0);
return DAG.getLoad(State.FloatVT, DL, Chain, State.FloatPtr,
State.FloatPointerInfo, false, false, false, 0);
}
SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
SDLoc DL(Node);
SDValue Mag = Node->getOperand(0);
SDValue Sign = Node->getOperand(1);
// Get sign bit into an integer value.
FloatSignAsInt SignAsInt;
getSignAsIntValue(SignAsInt, DL, Sign);
EVT IntVT = SignAsInt.IntValue.getValueType();
SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT);
SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, SignAsInt.IntValue,
SignMask);
// If FABS is legal transform FCOPYSIGN(x, y) => sign(x) ? -FABS(x) : FABS(X)
EVT FloatVT = Mag.getValueType();
if (TLI.isOperationLegalOrCustom(ISD::FABS, FloatVT) &&
TLI.isOperationLegalOrCustom(ISD::FNEG, FloatVT)) {
SDValue AbsValue = DAG.getNode(ISD::FABS, DL, FloatVT, Mag);
SDValue NegValue = DAG.getNode(ISD::FNEG, DL, FloatVT, AbsValue);
SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(IntVT), SignBit,
DAG.getConstant(0, DL, IntVT), ISD::SETNE);
return DAG.getSelect(DL, FloatVT, Cond, NegValue, AbsValue);
}
// Transform values to integer, copy the sign bit and transform back.
FloatSignAsInt MagAsInt;
getSignAsIntValue(MagAsInt, DL, Mag);
assert(SignAsInt.SignMask == MagAsInt.SignMask);
SDValue ClearSignMask = DAG.getConstant(~SignAsInt.SignMask, DL, IntVT);
SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, MagAsInt.IntValue,
ClearSignMask);
SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit);
return modifySignAsInt(MagAsInt, DL, CopiedSign);
}
SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const {
SDLoc DL(Node);
SDValue Value = Node->getOperand(0);
// Transform FABS(x) => FCOPYSIGN(x, 0.0) if FCOPYSIGN is legal.
EVT FloatVT = Value.getValueType();
if (TLI.isOperationLegalOrCustom(ISD::FCOPYSIGN, FloatVT)) {
SDValue Zero = DAG.getConstantFP(0.0, DL, FloatVT);
return DAG.getNode(ISD::FCOPYSIGN, DL, FloatVT, Value, Zero);
}
// Transform value to integer, clear the sign bit and transform back.
FloatSignAsInt ValueAsInt;
getSignAsIntValue(ValueAsInt, DL, Value);
EVT IntVT = ValueAsInt.IntValue.getValueType();
SDValue ClearSignMask = DAG.getConstant(~ValueAsInt.SignMask, DL, IntVT);
SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, ValueAsInt.IntValue,
ClearSignMask);
return modifySignAsInt(ValueAsInt, DL, ClearedSign);
} }
void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
@ -3196,18 +3277,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Node->getOperand(0)); Node->getOperand(0));
Results.push_back(Tmp1); Results.push_back(Tmp1);
break; break;
case ISD::FABS: { case ISD::FABS:
// Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X). Results.push_back(ExpandFABS(Node));
EVT VT = Node->getValueType(0);
Tmp1 = Node->getOperand(0);
Tmp2 = DAG.getConstantFP(0.0, dl, VT);
Tmp2 = DAG.getSetCC(dl, getSetCCResultType(Tmp1.getValueType()),
Tmp1, Tmp2, ISD::SETUGT);
Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1);
Tmp1 = DAG.getSelect(dl, VT, Tmp2, Tmp1, Tmp3);
Results.push_back(Tmp1);
break; break;
}
case ISD::SMIN: case ISD::SMIN:
case ISD::SMAX: case ISD::SMAX:
case ISD::UMIN: case ISD::UMIN:

View File

@ -109,8 +109,12 @@ declare double @llvm.fabs.f64(double %Val)
define double @abs_d(double %a) { define double @abs_d(double %a) {
; CHECK-LABEL: abs_d: ; CHECK-LABEL: abs_d:
; NONE: bic r1, r1, #-2147483648 ; NONE: bic r1, r1, #-2147483648
; SP: bl __aeabi_dsub ; SP: vldr d1, .LCPI{{.*}}
; SP: bl __aeabi_dcmple ; SP: vmov r0, r1, d0
; SP: vmov r2, r3, d1
; SP: lsrs r2, r3, #31
; SP: bfi r1, r2, #31, #1
; SP: vmov d0, r0, r1
; DP: vabs.f64 d0, d0 ; DP: vabs.f64 d0, d0
%1 = call double @llvm.fabs.f64(double %a) %1 = call double @llvm.fabs.f64(double %a)
ret double %1 ret double %1

View File

@ -6,10 +6,7 @@
; CHECK-NEXT: .long 2139095040 ; CHECK-NEXT: .long 2139095040
; CHECK-LABEL: foo: ; CHECK-LABEL: foo:
; CHECK: movq {{.*}}, %rax ; CHECK: testb $-128, -15(%rsp)
; CHECK: shlq $48, %rax
; CHECK: sets %al
; CHECK: testb %al, %al
; CHECK: flds LCPI0_0(%rip) ; CHECK: flds LCPI0_0(%rip)
; CHECK: flds LCPI0_1(%rip) ; CHECK: flds LCPI0_1(%rip)
; CHECK: fcmovne %st(1), %st(0) ; CHECK: fcmovne %st(1), %st(0)