forked from OSchip/llvm-project
transform obscured FP sign bit ops into a fabs/fneg using TLI hook
This is effectively a revert of: http://reviews.llvm.org/rL249702 - [InstCombine] transform masking off of an FP sign bit into a fabs() intrinsic call (PR24886) and: http://reviews.llvm.org/rL249701 - [ValueTracking] teach computeKnownBits that a fabs() clears sign bits and a reimplementation as a DAG combine for targets that have IEEE754-compliant fabs/fneg instructions. This is intended to resolve the objections raised on the dev list: http://lists.llvm.org/pipermail/llvm-dev/2016-April/098154.html and: https://llvm.org/bugs/show_bug.cgi?id=24886#c4 In the interest of patch minimalism, I've only partly enabled AArch64. PowerPC, MIPS, x86 and others can enable later. Differential Revision: http://reviews.llvm.org/D19391 llvm-svn: 271573
This commit is contained in:
parent
617409f0c0
commit
dba8b4c04d
|
@ -319,6 +319,14 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
/// Return true if it is safe to transform an integer-domain bitwise operation
|
||||
/// into the equivalent floating-point operation. This should be set to true
|
||||
/// if the target has IEEE-754-compliant fabs/fneg operations for the input
|
||||
/// type.
|
||||
virtual bool hasBitPreservingFPLogic(EVT VT) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// \brief Return if the target supports combining a
|
||||
/// chain like:
|
||||
/// \code
|
||||
|
|
|
@ -991,8 +991,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
|
|||
}
|
||||
case Instruction::BitCast: {
|
||||
Type *SrcTy = I->getOperand(0)->getType();
|
||||
if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy() ||
|
||||
SrcTy->isFloatingPointTy()) &&
|
||||
if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
|
||||
// TODO: For now, not handling conversions like:
|
||||
// (bitcast i64 %x to <2 x i32>)
|
||||
!I->getType()->isVectorTy()) {
|
||||
|
@ -1316,12 +1315,6 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
|
|||
// of bits which might be set provided by popcnt KnownOne2.
|
||||
break;
|
||||
}
|
||||
case Intrinsic::fabs: {
|
||||
Type *Ty = II->getType();
|
||||
APInt SignBit = APInt::getSignBit(Ty->getScalarSizeInBits());
|
||||
KnownZero |= APInt::getSplat(Ty->getPrimitiveSizeInBits(), SignBit);
|
||||
break;
|
||||
}
|
||||
case Intrinsic::x86_sse42_crc32_64_64:
|
||||
KnownZero |= APInt::getHighBitsSet(64, 32);
|
||||
break;
|
||||
|
@ -1381,9 +1374,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
|
|||
unsigned BitWidth = KnownZero.getBitWidth();
|
||||
|
||||
assert((V->getType()->isIntOrIntVectorTy() ||
|
||||
V->getType()->isFPOrFPVectorTy() ||
|
||||
V->getType()->getScalarType()->isPointerTy()) &&
|
||||
"Not integer, floating point, or pointer type!");
|
||||
"Not integer or pointer type!");
|
||||
assert((Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
|
||||
(!V->getType()->isIntOrIntVectorTy() ||
|
||||
V->getType()->getScalarSizeInBits() == BitWidth) &&
|
||||
|
|
|
@ -7351,6 +7351,49 @@ static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
|
|||
return DAG.getDataLayout().isBigEndian() ? 1 : 0;
|
||||
}
|
||||
|
||||
static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
|
||||
const TargetLowering &TLI) {
|
||||
// If this is not a bitcast to an FP type or if the target doesn't have
|
||||
// IEEE754-compliant FP logic, we're done.
|
||||
EVT VT = N->getValueType(0);
|
||||
if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
|
||||
return SDValue();
|
||||
|
||||
// TODO: Use splat values for the constant-checking below and remove this
|
||||
// restriction.
|
||||
SDValue N0 = N->getOperand(0);
|
||||
EVT SourceVT = N0.getValueType();
|
||||
if (SourceVT.isVector())
|
||||
return SDValue();
|
||||
|
||||
unsigned FPOpcode;
|
||||
APInt SignMask;
|
||||
switch (N0.getOpcode()) {
|
||||
case ISD::AND:
|
||||
FPOpcode = ISD::FABS;
|
||||
SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits());
|
||||
break;
|
||||
case ISD::XOR:
|
||||
FPOpcode = ISD::FNEG;
|
||||
SignMask = APInt::getSignBit(SourceVT.getSizeInBits());
|
||||
break;
|
||||
// TODO: ISD::OR --> ISD::FNABS?
|
||||
default:
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
|
||||
// Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
|
||||
SDValue LogicOp0 = N0.getOperand(0);
|
||||
ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
|
||||
if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
|
||||
LogicOp0.getOpcode() == ISD::BITCAST &&
|
||||
LogicOp0->getOperand(0).getValueType() == VT)
|
||||
return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitBITCAST(SDNode *N) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
@ -7415,6 +7458,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
|
|||
}
|
||||
}
|
||||
|
||||
if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
|
||||
return V;
|
||||
|
||||
// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
|
||||
// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
|
||||
//
|
||||
|
|
|
@ -395,6 +395,12 @@ public:
|
|||
bool isCheapToSpeculateCtlz() const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool hasBitPreservingFPLogic(EVT VT) const override {
|
||||
// FIXME: Is this always true? It should be true for vectors at least.
|
||||
return VT == MVT::f32 || VT == MVT::f64;
|
||||
}
|
||||
|
||||
bool supportSplitCSR(MachineFunction *MF) const override {
|
||||
return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
|
||||
MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
|
||||
|
|
|
@ -1594,24 +1594,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
|
|||
if (Instruction *CastedAnd = foldCastedBitwiseLogic(I))
|
||||
return CastedAnd;
|
||||
|
||||
if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
|
||||
Value *Op0COp = Op0C->getOperand(0);
|
||||
Type *SrcTy = Op0COp->getType();
|
||||
|
||||
// If we are masking off the sign bit of a floating-point value, convert
|
||||
// this to the canonical fabs intrinsic call and cast back to integer.
|
||||
// The backend should know how to optimize fabs().
|
||||
// TODO: This transform should also apply to vectors.
|
||||
ConstantInt *CI;
|
||||
if (isa<BitCastInst>(Op0C) && SrcTy->isFloatingPointTy() &&
|
||||
match(Op1, m_ConstantInt(CI)) && CI->isMaxValue(true)) {
|
||||
Module *M = I.getModule();
|
||||
Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, SrcTy);
|
||||
Value *Call = Builder->CreateCall(Fabs, Op0COp, "fabs");
|
||||
return CastInst::CreateBitOrPointerCast(Call, I.getType());
|
||||
}
|
||||
}
|
||||
|
||||
if (Instruction *Select = foldBoolSextMaskToSelect(I))
|
||||
return Select;
|
||||
|
||||
|
|
|
@ -153,9 +153,7 @@ define double @test_bitcasti64todouble(i64 %in) {
|
|||
define double @bitcast_fabs(double %x) {
|
||||
; CHECK-LABEL: bitcast_fabs:
|
||||
; CHECK: ; BB#0:
|
||||
; CHECK-NEXT: fmov x8, d0
|
||||
; CHECK-NEXT: and x8, x8, #0x7fffffffffffffff
|
||||
; CHECK-NEXT: fmov d0, x8
|
||||
; CHECK-NEXT: fabs d0, d0
|
||||
; CHECK-NEXT: ret
|
||||
;
|
||||
%bc1 = bitcast double %x to i64
|
||||
|
@ -167,9 +165,7 @@ define double @bitcast_fabs(double %x) {
|
|||
define float @bitcast_fneg(float %x) {
|
||||
; CHECK-LABEL: bitcast_fneg:
|
||||
; CHECK: ; BB#0:
|
||||
; CHECK-NEXT: fmov w8, s0
|
||||
; CHECK-NEXT: eor w8, w8, #0x80000000
|
||||
; CHECK-NEXT: fmov s0, w8
|
||||
; CHECK-NEXT: fneg s0, s0
|
||||
; CHECK-NEXT: ret
|
||||
;
|
||||
%bc1 = bitcast float %x to i32
|
||||
|
|
|
@ -103,45 +103,3 @@ define i64 @test10(i64 %x) {
|
|||
ret i64 %add
|
||||
}
|
||||
|
||||
define i64 @fabs_double(double %x) {
|
||||
; CHECK-LABEL: @fabs_double(
|
||||
; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
|
||||
; CHECK-NEXT: %and = bitcast double %fabs to i64
|
||||
; CHECK-NEXT: ret i64 %and
|
||||
%bc = bitcast double %x to i64
|
||||
%and = and i64 %bc, 9223372036854775807
|
||||
ret i64 %and
|
||||
}
|
||||
|
||||
define i64 @fabs_double_swap(double %x) {
|
||||
; CHECK-LABEL: @fabs_double_swap(
|
||||
; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
|
||||
; CHECK-NEXT: %and = bitcast double %fabs to i64
|
||||
; CHECK-NEXT: ret i64 %and
|
||||
%bc = bitcast double %x to i64
|
||||
%and = and i64 9223372036854775807, %bc
|
||||
ret i64 %and
|
||||
}
|
||||
|
||||
define i32 @fabs_float(float %x) {
|
||||
; CHECK-LABEL: @fabs_float(
|
||||
; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x)
|
||||
; CHECK-NEXT: %and = bitcast float %fabs to i32
|
||||
; CHECK-NEXT: ret i32 %and
|
||||
%bc = bitcast float %x to i32
|
||||
%and = and i32 %bc, 2147483647
|
||||
ret i32 %and
|
||||
}
|
||||
|
||||
; Make sure that only a bitcast is transformed.
|
||||
|
||||
define i64 @fabs_double_not_bitcast(double %x) {
|
||||
; CHECK-LABEL: @fabs_double_not_bitcast(
|
||||
; CHECK-NEXT: %bc = fptoui double %x to i64
|
||||
; CHECK-NEXT: %and = and i64 %bc, 9223372036854775807
|
||||
; CHECK-NEXT: ret i64 %and
|
||||
%bc = fptoui double %x to i64
|
||||
%and = and i64 %bc, 9223372036854775807
|
||||
ret i64 %and
|
||||
}
|
||||
|
||||
|
|
|
@ -41,7 +41,6 @@ define fp128 @square_fabs_call_f128(fp128 %x) {
|
|||
declare float @llvm.fabs.f32(float)
|
||||
declare double @llvm.fabs.f64(double)
|
||||
declare fp128 @llvm.fabs.f128(fp128)
|
||||
declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
|
||||
|
||||
define float @square_fabs_intrinsic_f32(float %x) {
|
||||
%mul = fmul float %x, %x
|
||||
|
@ -99,27 +98,3 @@ define float @square_fabs_shrink_call2(float %x) {
|
|||
; CHECK-NEXT: ret float %sq
|
||||
}
|
||||
|
||||
; A scalar fabs op makes the sign bit zero, so masking off all of the other bits means we can return zero.
|
||||
|
||||
define i32 @fabs_value_tracking_f32(float %x) {
|
||||
%call = call float @llvm.fabs.f32(float %x)
|
||||
%bc = bitcast float %call to i32
|
||||
%and = and i32 %bc, 2147483648
|
||||
ret i32 %and
|
||||
|
||||
; CHECK-LABEL: fabs_value_tracking_f32(
|
||||
; CHECK: ret i32 0
|
||||
}
|
||||
|
||||
; TODO: A vector fabs op makes the sign bits zero, so masking off all of the other bits means we can return zero.
|
||||
|
||||
define <4 x i32> @fabs_value_tracking_v4f32(<4 x float> %x) {
|
||||
%call = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
|
||||
%bc = bitcast <4 x float> %call to <4 x i32>
|
||||
%and = and <4 x i32> %bc, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
|
||||
ret <4 x i32> %and
|
||||
|
||||
; CHECK-LABEL: fabs_value_tracking_v4f32(
|
||||
; CHECK: ret <4 x i32> %and
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue