[x86, BMI] add TLI hook for 'andn' and use it to simplify comparisons

For the sake of minimalism, this patch is x86 only, but I think that at least
PPC, ARM, AArch64, and Sparc probably want to do this too.

We might want to generalize the hook and pattern recognition for a target like
PPC that has a full assortment of negated logic ops (orc, nand).

Note that http://reviews.llvm.org/D18842 will cause this transform to trigger
more often.

For reference, this relates to:
https://llvm.org/bugs/show_bug.cgi?id=27105
https://llvm.org/bugs/show_bug.cgi?id=27202
https://llvm.org/bugs/show_bug.cgi?id=27203
https://llvm.org/bugs/show_bug.cgi?id=27328

Differential Revision: http://reviews.llvm.org/D19087

llvm-svn: 268858
This commit is contained in:
Sanjay Patel 2016-05-07 15:03:40 +00:00
parent 65f088f528
commit c2751e7050
5 changed files with 143 additions and 11 deletions

View File

@ -334,6 +334,22 @@ public:
return MaskAndBranchFoldingIsLegal;
}
/// Return true if the target should transform:
/// (X & Y) == Y ---> (~X & Y) == 0
/// (X & Y) != Y ---> (~X & Y) != 0
///
/// This may be profitable if the target has a bitwise and-not operation that
/// sets comparison flags. A target may want to limit the transformation based
/// on the type of Y or if Y is a constant.
///
/// Note that the transform will not occur if Y is known to be a power-of-2
/// because a mask and compare of a single bit can be handled by inverting the
/// predicate, for example:
/// (X & 8) == 8 ---> (X & 8) != 0
virtual bool hasAndNotCompare(SDValue Y) const {
return false;
}
/// \brief Return true if the target wants to use the optimization that
/// turns ext(promotableInst1(...(promotableInstN(load)))) into
/// promotedInst1(...(promotedInstN(ext(load)))).

View File

@ -1304,6 +1304,52 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
llvm_unreachable("Unexpected enumeration.");
}
/// If the target supports an 'and-not' or 'and-complement' logic operation,
/// try to use that to make a comparison operation more efficient.
static SDValue createAndNotSetCC(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode Cond, SelectionDAG &DAG,
SDLoc dl) {
// Match these patterns in any of their permutations:
// (X & Y) == Y
// (X & Y) != Y
if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
std::swap(N0, N1);
if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
(Cond != ISD::SETEQ && Cond != ISD::SETNE))
return SDValue();
SDValue X, Y;
if (N0.getOperand(0) == N1) {
X = N0.getOperand(1);
Y = N0.getOperand(0);
} else if (N0.getOperand(1) == N1) {
X = N0.getOperand(0);
Y = N0.getOperand(1);
} else {
return SDValue();
}
// Bail out if the compare operand that we want to turn into a zero is already
// a zero (otherwise, infinite loop).
auto *YConst = dyn_cast<ConstantSDNode>(Y);
if (YConst && YConst->isNullValue())
return SDValue();
// We don't want to do this transform if the mask is a single bit because
// there are more efficient ways to deal with that case (for example, 'bt' on
// x86 or 'rlwinm' on PPC).
if (!DAG.getTargetLoweringInfo().hasAndNotCompare(Y) ||
valueHasExactlyOneBitSet(Y, DAG))
return SDValue();
// Transform this into: ~X & Y == 0.
EVT OpVT = X.getValueType();
SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
return DAG.getSetCC(dl, VT, NewAnd, DAG.getConstant(0, dl, OpVT), Cond);
}
/// Try to simplify a setcc built with the specified operands and cc. If it is
/// unable to simplify it, return a null SDValue.
SDValue
@ -2166,6 +2212,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return N0;
}
if (SDValue AndNotCC = createAndNotSetCC(VT, N0, N1, Cond, DAG, dl))
return AndNotCC;
// Could not fold it.
return SDValue();
}

View File

@ -4122,6 +4122,18 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget.hasLZCNT();
}
bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
if (!Subtarget.hasBMI())
return false;
// There are only 32-bit and 64-bit forms for 'andn'.
EVT VT = Y.getValueType();
if (VT != MVT::i32 && VT != MVT::i64)
return false;
return true;
}
/// Return true if every element in Mask, beginning
/// from position Pos and ending in Pos+Size is undef.
static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {

View File

@ -751,6 +751,8 @@ namespace llvm {
bool isCheapToSpeculateCtlz() const override;
bool hasAndNotCompare(SDValue Y) const override;
/// Return the value type to use for ISD::SETCC.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;

View File

@ -135,12 +135,11 @@ define i1 @andn_cmp(i32 %x, i32 %y) {
ret i1 %cmp
}
; TODO: Recognize a disguised andn in the following 4 tests.
; Recognize a disguised andn in the following 4 tests.
define i1 @and_cmp1(i32 %x, i32 %y) {
; CHECK-LABEL: and_cmp1:
; CHECK: # BB#0:
; CHECK-NEXT: andl %esi, %edi
; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: andnl %esi, %edi, %eax
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%and = and i32 %x, %y
@ -151,8 +150,7 @@ define i1 @and_cmp1(i32 %x, i32 %y) {
define i1 @and_cmp2(i32 %x, i32 %y) {
; CHECK-LABEL: and_cmp2:
; CHECK: # BB#0:
; CHECK-NEXT: andl %esi, %edi
; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: andnl %esi, %edi, %eax
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%and = and i32 %y, %x
@ -163,8 +161,7 @@ define i1 @and_cmp2(i32 %x, i32 %y) {
define i1 @and_cmp3(i32 %x, i32 %y) {
; CHECK-LABEL: and_cmp3:
; CHECK: # BB#0:
; CHECK-NEXT: andl %esi, %edi
; CHECK-NEXT: cmpl %edi, %esi
; CHECK-NEXT: andnl %esi, %edi, %eax
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%and = and i32 %x, %y
@ -175,8 +172,7 @@ define i1 @and_cmp3(i32 %x, i32 %y) {
define i1 @and_cmp4(i32 %x, i32 %y) {
; CHECK-LABEL: and_cmp4:
; CHECK: # BB#0:
; CHECK-NEXT: andl %esi, %edi
; CHECK-NEXT: cmpl %edi, %esi
; CHECK-NEXT: andnl %esi, %edi, %eax
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%and = and i32 %y, %x
@ -189,8 +185,8 @@ define i1 @and_cmp4(i32 %x, i32 %y) {
define i1 @and_cmp_const(i32 %x) {
; CHECK-LABEL: and_cmp_const:
; CHECK: # BB#0:
; CHECK-NEXT: andl $43, %edi
; CHECK-NEXT: cmpl $43, %edi
; CHECK-NEXT: movl $43, %eax
; CHECK-NEXT: andnl %eax, %edi, %eax
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%and = and i32 %x, 43
@ -198,6 +194,63 @@ define i1 @and_cmp_const(i32 %x) {
ret i1 %cmp
}
; But don't use 'andn' if the mask is a power-of-two.
define i1 @and_cmp_const_power_of_two(i32 %x, i32 %y) {
; CHECK-LABEL: and_cmp_const_power_of_two:
; CHECK: # BB#0:
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: setae %al
; CHECK-NEXT: retq
;
%shl = shl i32 1, %y
%and = and i32 %x, %shl
%cmp = icmp ne i32 %and, %shl
ret i1 %cmp
}
; Don't transform to 'andn' if there's another use of the 'and'.
define i32 @and_cmp_not_one_use(i32 %x) {
; CHECK-LABEL: and_cmp_not_one_use:
; CHECK: # BB#0:
; CHECK-NEXT: andl $37, %edi
; CHECK-NEXT: cmpl $37, %edi
; CHECK-NEXT: sete %al
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: addl %edi, %eax
; CHECK-NEXT: retq
;
%and = and i32 %x, 37
%cmp = icmp eq i32 %and, 37
%ext = zext i1 %cmp to i32
%add = add i32 %and, %ext
ret i32 %add
}
; Verify that we're not transforming invalid comparison predicates.
define i1 @not_an_andn1(i32 %x, i32 %y) {
; CHECK-LABEL: not_an_andn1:
; CHECK: # BB#0:
; CHECK-NEXT: andl %esi, %edi
; CHECK-NEXT: cmpl %edi, %esi
; CHECK-NEXT: setg %al
; CHECK-NEXT: retq
%and = and i32 %x, %y
%cmp = icmp sgt i32 %y, %and
ret i1 %cmp
}
define i1 @not_an_andn2(i32 %x, i32 %y) {
; CHECK-LABEL: not_an_andn2:
; CHECK: # BB#0:
; CHECK-NEXT: andl %esi, %edi
; CHECK-NEXT: cmpl %edi, %esi
; CHECK-NEXT: setbe %al
; CHECK-NEXT: retq
%and = and i32 %y, %x
%cmp = icmp ule i32 %y, %and
ret i1 %cmp
}
; Don't choose a 'test' if an 'andn' can be used.
define i1 @andn_cmp_swap_ops(i64 %x, i64 %y) {
; CHECK-LABEL: andn_cmp_swap_ops: