forked from OSchip/llvm-project
[x86, BMI] add TLI hook for 'andn' and use it to simplify comparisons
For the sake of minimalism, this patch is x86 only, but I think that at least PPC, ARM, AArch64, and Sparc probably want to do this too. We might want to generalize the hook and pattern recognition for a target like PPC that has a full assortment of negated logic ops (orc, nand). Note that http://reviews.llvm.org/D18842 will cause this transform to trigger more often. For reference, this relates to: https://llvm.org/bugs/show_bug.cgi?id=27105 https://llvm.org/bugs/show_bug.cgi?id=27202 https://llvm.org/bugs/show_bug.cgi?id=27203 https://llvm.org/bugs/show_bug.cgi?id=27328 Differential Revision: http://reviews.llvm.org/D19087 llvm-svn: 268858
This commit is contained in:
parent
65f088f528
commit
c2751e7050
|
@ -334,6 +334,22 @@ public:
|
|||
return MaskAndBranchFoldingIsLegal;
|
||||
}
|
||||
|
||||
/// Return true if the target should transform:
|
||||
/// (X & Y) == Y ---> (~X & Y) == 0
|
||||
/// (X & Y) != Y ---> (~X & Y) != 0
|
||||
///
|
||||
/// This may be profitable if the target has a bitwise and-not operation that
|
||||
/// sets comparison flags. A target may want to limit the transformation based
|
||||
/// on the type of Y or if Y is a constant.
|
||||
///
|
||||
/// Note that the transform will not occur if Y is known to be a power-of-2
|
||||
/// because a mask and compare of a single bit can be handled by inverting the
|
||||
/// predicate, for example:
|
||||
/// (X & 8) == 8 ---> (X & 8) != 0
|
||||
virtual bool hasAndNotCompare(SDValue Y) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// \brief Return true if the target wants to use the optimization that
|
||||
/// turns ext(promotableInst1(...(promotableInstN(load)))) into
|
||||
/// promotedInst1(...(promotedInstN(ext(load)))).
|
||||
|
|
|
@ -1304,6 +1304,52 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
|
|||
llvm_unreachable("Unexpected enumeration.");
|
||||
}
|
||||
|
||||
/// If the target supports an 'and-not' or 'and-complement' logic operation,
|
||||
/// try to use that to make a comparison operation more efficient.
|
||||
static SDValue createAndNotSetCC(EVT VT, SDValue N0, SDValue N1,
|
||||
ISD::CondCode Cond, SelectionDAG &DAG,
|
||||
SDLoc dl) {
|
||||
// Match these patterns in any of their permutations:
|
||||
// (X & Y) == Y
|
||||
// (X & Y) != Y
|
||||
if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
|
||||
std::swap(N0, N1);
|
||||
|
||||
if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
|
||||
(Cond != ISD::SETEQ && Cond != ISD::SETNE))
|
||||
return SDValue();
|
||||
|
||||
SDValue X, Y;
|
||||
if (N0.getOperand(0) == N1) {
|
||||
X = N0.getOperand(1);
|
||||
Y = N0.getOperand(0);
|
||||
} else if (N0.getOperand(1) == N1) {
|
||||
X = N0.getOperand(0);
|
||||
Y = N0.getOperand(1);
|
||||
} else {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Bail out if the compare operand that we want to turn into a zero is already
|
||||
// a zero (otherwise, infinite loop).
|
||||
auto *YConst = dyn_cast<ConstantSDNode>(Y);
|
||||
if (YConst && YConst->isNullValue())
|
||||
return SDValue();
|
||||
|
||||
// We don't want to do this transform if the mask is a single bit because
|
||||
// there are more efficient ways to deal with that case (for example, 'bt' on
|
||||
// x86 or 'rlwinm' on PPC).
|
||||
if (!DAG.getTargetLoweringInfo().hasAndNotCompare(Y) ||
|
||||
valueHasExactlyOneBitSet(Y, DAG))
|
||||
return SDValue();
|
||||
|
||||
// Transform this into: ~X & Y == 0.
|
||||
EVT OpVT = X.getValueType();
|
||||
SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
|
||||
SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
|
||||
return DAG.getSetCC(dl, VT, NewAnd, DAG.getConstant(0, dl, OpVT), Cond);
|
||||
}
|
||||
|
||||
/// Try to simplify a setcc built with the specified operands and cc. If it is
|
||||
/// unable to simplify it, return a null SDValue.
|
||||
SDValue
|
||||
|
@ -2166,6 +2212,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
|
|||
return N0;
|
||||
}
|
||||
|
||||
if (SDValue AndNotCC = createAndNotSetCC(VT, N0, N1, Cond, DAG, dl))
|
||||
return AndNotCC;
|
||||
|
||||
// Could not fold it.
|
||||
return SDValue();
|
||||
}
|
||||
|
|
|
@ -4122,6 +4122,18 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
|
|||
return Subtarget.hasLZCNT();
|
||||
}
|
||||
|
||||
bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
|
||||
if (!Subtarget.hasBMI())
|
||||
return false;
|
||||
|
||||
// There are only 32-bit and 64-bit forms for 'andn'.
|
||||
EVT VT = Y.getValueType();
|
||||
if (VT != MVT::i32 && VT != MVT::i64)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Return true if every element in Mask, beginning
|
||||
/// from position Pos and ending in Pos+Size is undef.
|
||||
static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
|
||||
|
|
|
@ -751,6 +751,8 @@ namespace llvm {
|
|||
|
||||
bool isCheapToSpeculateCtlz() const override;
|
||||
|
||||
bool hasAndNotCompare(SDValue Y) const override;
|
||||
|
||||
/// Return the value type to use for ISD::SETCC.
|
||||
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
|
||||
EVT VT) const override;
|
||||
|
|
|
@ -135,12 +135,11 @@ define i1 @andn_cmp(i32 %x, i32 %y) {
|
|||
ret i1 %cmp
|
||||
}
|
||||
|
||||
; TODO: Recognize a disguised andn in the following 4 tests.
|
||||
; Recognize a disguised andn in the following 4 tests.
|
||||
define i1 @and_cmp1(i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: and_cmp1:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: andl %esi, %edi
|
||||
; CHECK-NEXT: cmpl %esi, %edi
|
||||
; CHECK-NEXT: andnl %esi, %edi, %eax
|
||||
; CHECK-NEXT: sete %al
|
||||
; CHECK-NEXT: retq
|
||||
%and = and i32 %x, %y
|
||||
|
@ -151,8 +150,7 @@ define i1 @and_cmp1(i32 %x, i32 %y) {
|
|||
define i1 @and_cmp2(i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: and_cmp2:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: andl %esi, %edi
|
||||
; CHECK-NEXT: cmpl %esi, %edi
|
||||
; CHECK-NEXT: andnl %esi, %edi, %eax
|
||||
; CHECK-NEXT: setne %al
|
||||
; CHECK-NEXT: retq
|
||||
%and = and i32 %y, %x
|
||||
|
@ -163,8 +161,7 @@ define i1 @and_cmp2(i32 %x, i32 %y) {
|
|||
define i1 @and_cmp3(i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: and_cmp3:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: andl %esi, %edi
|
||||
; CHECK-NEXT: cmpl %edi, %esi
|
||||
; CHECK-NEXT: andnl %esi, %edi, %eax
|
||||
; CHECK-NEXT: sete %al
|
||||
; CHECK-NEXT: retq
|
||||
%and = and i32 %x, %y
|
||||
|
@ -175,8 +172,7 @@ define i1 @and_cmp3(i32 %x, i32 %y) {
|
|||
define i1 @and_cmp4(i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: and_cmp4:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: andl %esi, %edi
|
||||
; CHECK-NEXT: cmpl %edi, %esi
|
||||
; CHECK-NEXT: andnl %esi, %edi, %eax
|
||||
; CHECK-NEXT: setne %al
|
||||
; CHECK-NEXT: retq
|
||||
%and = and i32 %y, %x
|
||||
|
@ -189,8 +185,8 @@ define i1 @and_cmp4(i32 %x, i32 %y) {
|
|||
define i1 @and_cmp_const(i32 %x) {
|
||||
; CHECK-LABEL: and_cmp_const:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: andl $43, %edi
|
||||
; CHECK-NEXT: cmpl $43, %edi
|
||||
; CHECK-NEXT: movl $43, %eax
|
||||
; CHECK-NEXT: andnl %eax, %edi, %eax
|
||||
; CHECK-NEXT: sete %al
|
||||
; CHECK-NEXT: retq
|
||||
%and = and i32 %x, 43
|
||||
|
@ -198,6 +194,63 @@ define i1 @and_cmp_const(i32 %x) {
|
|||
ret i1 %cmp
|
||||
}
|
||||
|
||||
; But don't use 'andn' if the mask is a power-of-two.
|
||||
define i1 @and_cmp_const_power_of_two(i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: and_cmp_const_power_of_two:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: btl %esi, %edi
|
||||
; CHECK-NEXT: setae %al
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
%shl = shl i32 1, %y
|
||||
%and = and i32 %x, %shl
|
||||
%cmp = icmp ne i32 %and, %shl
|
||||
ret i1 %cmp
|
||||
}
|
||||
|
||||
; Don't transform to 'andn' if there's another use of the 'and'.
|
||||
define i32 @and_cmp_not_one_use(i32 %x) {
|
||||
; CHECK-LABEL: and_cmp_not_one_use:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: andl $37, %edi
|
||||
; CHECK-NEXT: cmpl $37, %edi
|
||||
; CHECK-NEXT: sete %al
|
||||
; CHECK-NEXT: movzbl %al, %eax
|
||||
; CHECK-NEXT: addl %edi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
%and = and i32 %x, 37
|
||||
%cmp = icmp eq i32 %and, 37
|
||||
%ext = zext i1 %cmp to i32
|
||||
%add = add i32 %and, %ext
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; Verify that we're not transforming invalid comparison predicates.
|
||||
define i1 @not_an_andn1(i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: not_an_andn1:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: andl %esi, %edi
|
||||
; CHECK-NEXT: cmpl %edi, %esi
|
||||
; CHECK-NEXT: setg %al
|
||||
; CHECK-NEXT: retq
|
||||
%and = and i32 %x, %y
|
||||
%cmp = icmp sgt i32 %y, %and
|
||||
ret i1 %cmp
|
||||
}
|
||||
|
||||
define i1 @not_an_andn2(i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: not_an_andn2:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: andl %esi, %edi
|
||||
; CHECK-NEXT: cmpl %edi, %esi
|
||||
; CHECK-NEXT: setbe %al
|
||||
; CHECK-NEXT: retq
|
||||
%and = and i32 %y, %x
|
||||
%cmp = icmp ule i32 %y, %and
|
||||
ret i1 %cmp
|
||||
}
|
||||
|
||||
; Don't choose a 'test' if an 'andn' can be used.
|
||||
define i1 @andn_cmp_swap_ops(i64 %x, i64 %y) {
|
||||
; CHECK-LABEL: andn_cmp_swap_ops:
|
||||
|
|
Loading…
Reference in New Issue