forked from OSchip/llvm-project
Reapply "[ARM] Combine CMOV into BFI where possible"
Added fixes for stage2 failures: CMOV is not commutable; commuting the operands results in the condition being flipped! d'oh! Original commit message: If we have a CMOV, OR and AND combination such as: if (x & CN) y |= CM; And: * CN is a single bit; * All bits covered by CM are known zero in y; Then we can convert this to a sequence of BFI instructions. This will always be a win if CM is a single bit, will always be no worse than the TST & OR sequence if CM is two bits, and for thumb will be no worse if CM is three bits (due to the extra IT instruction). llvm-svn: 252606
This commit is contained in:
parent
c78e89962a
commit
9d55f19cfa
|
@ -10241,6 +10241,111 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero,
|
||||
APInt &KnownOne) {
|
||||
if (Op.getOpcode() == ARMISD::BFI) {
|
||||
// Conservatively, we can recurse down the first operand
|
||||
// and just mask out all affected bits.
|
||||
computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne);
|
||||
|
||||
// The operand to BFI is already a mask suitable for removing the bits it
|
||||
// sets.
|
||||
ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
|
||||
APInt Mask = CI->getAPIntValue();
|
||||
KnownZero &= Mask;
|
||||
KnownOne &= Mask;
|
||||
return;
|
||||
}
|
||||
if (Op.getOpcode() == ARMISD::CMOV) {
|
||||
APInt KZ2(KnownZero.getBitWidth(), 0);
|
||||
APInt KO2(KnownOne.getBitWidth(), 0);
|
||||
computeKnownBits(DAG, Op.getOperand(1), KnownZero, KnownOne);
|
||||
computeKnownBits(DAG, Op.getOperand(2), KZ2, KO2);
|
||||
|
||||
KnownZero &= KZ2;
|
||||
KnownOne &= KO2;
|
||||
return;
|
||||
}
|
||||
return DAG.computeKnownBits(Op, KnownZero, KnownOne);
|
||||
}
|
||||
|
||||
SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const {
|
||||
// If we have a CMOV, OR and AND combination such as:
|
||||
// if (x & CN)
|
||||
// y |= CM;
|
||||
//
|
||||
// And:
|
||||
// * CN is a single bit;
|
||||
// * All bits covered by CM are known zero in y
|
||||
//
|
||||
// Then we can convert this into a sequence of BFI instructions. This will
|
||||
// always be a win if CM is a single bit, will always be no worse than the
|
||||
// TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
|
||||
// three bits (due to the extra IT instruction).
|
||||
|
||||
SDValue Op0 = CMOV->getOperand(0);
|
||||
SDValue Op1 = CMOV->getOperand(1);
|
||||
SDValue CmpZ = CMOV->getOperand(4);
|
||||
|
||||
assert(CmpZ->getOpcode() == ARMISD::CMPZ);
|
||||
SDValue And = CmpZ->getOperand(0);
|
||||
if (And->getOpcode() != ISD::AND)
|
||||
return SDValue();
|
||||
ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(And->getOperand(1));
|
||||
if (!AndC || !AndC->getAPIntValue().isPowerOf2())
|
||||
return SDValue();
|
||||
SDValue X = And->getOperand(0);
|
||||
|
||||
if (Op1->getOpcode() != ISD::OR)
|
||||
return SDValue();
|
||||
|
||||
ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
|
||||
if (!OrC)
|
||||
return SDValue();
|
||||
SDValue Y = Op1->getOperand(0);
|
||||
|
||||
if (Op0 != Y)
|
||||
return SDValue();
|
||||
|
||||
// Now, is it profitable to continue?
|
||||
APInt OrCI = OrC->getAPIntValue();
|
||||
unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
|
||||
if (OrCI.countPopulation() > Heuristic)
|
||||
return SDValue();
|
||||
|
||||
// Lastly, can we determine that the bits defined by OrCI
|
||||
// are zero in Y?
|
||||
APInt KnownZero, KnownOne;
|
||||
computeKnownBits(DAG, Y, KnownZero, KnownOne);
|
||||
if ((OrCI & KnownZero) != OrCI)
|
||||
return SDValue();
|
||||
|
||||
// OK, we can do the combine.
|
||||
SDValue V = Y;
|
||||
SDLoc dl(X);
|
||||
EVT VT = X.getValueType();
|
||||
unsigned BitInX = AndC->getAPIntValue().logBase2();
|
||||
|
||||
if (BitInX != 0) {
|
||||
// We must shift X first.
|
||||
X = DAG.getNode(ISD::SRL, dl, VT, X,
|
||||
DAG.getConstant(BitInX, dl, VT));
|
||||
}
|
||||
|
||||
for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
|
||||
BitInY < NumActiveBits; ++BitInY) {
|
||||
if (OrCI[BitInY] == 0)
|
||||
continue;
|
||||
APInt Mask(VT.getSizeInBits(), 0);
|
||||
Mask.setBit(BitInY);
|
||||
V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
|
||||
// Confusingly, the operand is an *inverted* mask.
|
||||
DAG.getConstant(~Mask, dl, VT));
|
||||
}
|
||||
|
||||
return V;
|
||||
}
|
||||
|
||||
/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
|
||||
SDValue
|
||||
ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
|
||||
|
@ -10259,6 +10364,13 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
|
|||
ARMCC::CondCodes CC =
|
||||
(ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
|
||||
|
||||
// BFI is only available on V6T2+.
|
||||
if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
|
||||
SDValue R = PerformCMOVToBFICombine(N, DAG);
|
||||
if (R)
|
||||
return R;
|
||||
}
|
||||
|
||||
// Simplify
|
||||
// mov r1, r0
|
||||
// cmp r1, x
|
||||
|
|
|
@ -260,6 +260,7 @@ namespace llvm {
|
|||
SDNode *Node) const override;
|
||||
|
||||
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
|
||||
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const;
|
||||
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
|
||||
|
||||
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override;
|
||||
|
|
|
@ -74,3 +74,37 @@ entry:
|
|||
%or = or i32 %shl, %and
|
||||
ret i32 %or
|
||||
}
|
||||
|
||||
define i32 @f7(i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK: bfi r1, r0, #4, #1
|
||||
%y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
|
||||
%and = and i32 %x, 4
|
||||
%or = or i32 %y2, 16
|
||||
%cmp = icmp ne i32 %and, 0
|
||||
%sel = select i1 %cmp, i32 %or, i32 %y2
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
define i32 @f8(i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: f8:
|
||||
; CHECK: bfi r1, r0, #4, #1
|
||||
; CHECK: bfi r1, r0, #5, #1
|
||||
%y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
|
||||
%and = and i32 %x, 4
|
||||
%or = or i32 %y2, 48
|
||||
%cmp = icmp ne i32 %and, 0
|
||||
%sel = select i1 %cmp, i32 %or, i32 %y2
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
define i32 @f9(i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: f9:
|
||||
; CHECK-NOT: bfi
|
||||
%y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
|
||||
%and = and i32 %x, 4
|
||||
%or = or i32 %y2, 48
|
||||
%cmp = icmp ne i32 %and, 0
|
||||
%sel = select i1 %cmp, i32 %y2, i32 %or
|
||||
ret i32 %sel
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue