[x86, BMI] add TLI hook for 'andn' and use it to simplify comparisons

For the sake of minimalism, this patch is x86 only, but I think that at least PPC, ARM, AArch64, and Sparc probably want to do this too. We might want to generalize the hook and pattern recognition for a target like PPC that has a full assortment of negated logic ops (orc, nand). Note that http://reviews.llvm.org/D18842 will cause this transform to trigger more often. For reference, this relates to: https://llvm.org/bugs/show_bug.cgi?id=27105 https://llvm.org/bugs/show_bug.cgi?id=27202 https://llvm.org/bugs/show_bug.cgi?id=27203 https://llvm.org/bugs/show_bug.cgi?id=27328 Differential Revision: http://reviews.llvm.org/D19087 llvm-svn: 268858
2016-05-07 15:03:40 +00:00 · 2016-05-07 15:03:40 +00:00 · c2751e7050
parent 65f088f528
commit c2751e7050
5 changed files with 143 additions and 11 deletions
--- a/llvm/include/llvm/Target/TargetLowering.h
+++ b/llvm/include/llvm/Target/TargetLowering.h
@ -334,6 +334,22 @@ public:
    return MaskAndBranchFoldingIsLegal;
  }

+  /// Return true if the target should transform:
+  /// (X & Y) == Y ---> (~X & Y) == 0
+  /// (X & Y) != Y ---> (~X & Y) != 0
+  ///
+  /// This may be profitable if the target has a bitwise and-not operation that
+  /// sets comparison flags. A target may want to limit the transformation based
+  /// on the type of Y or if Y is a constant.
+  ///
+  /// Note that the transform will not occur if Y is known to be a power-of-2
+  /// because a mask and compare of a single bit can be handled by inverting the
+  /// predicate, for example:
+  /// (X & 8) == 8 ---> (X & 8) != 0
+  virtual bool hasAndNotCompare(SDValue Y) const {
+    return false;
+  }
+
  /// \brief Return true if the target wants to use the optimization that
  /// turns ext(promotableInst1(...(promotableInstN(load)))) into
  /// promotedInst1(...(promotedInstN(ext(load)))).
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@ -1304,6 +1304,52 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
  llvm_unreachable("Unexpected enumeration.");
 }

+/// If the target supports an 'and-not' or 'and-complement' logic operation,
+/// try to use that to make a comparison operation more efficient.
+static SDValue createAndNotSetCC(EVT VT, SDValue N0, SDValue N1,
+                                 ISD::CondCode Cond, SelectionDAG &DAG,
+                                 SDLoc dl) {
+  // Match these patterns in any of their permutations:
+  // (X & Y) == Y
+  // (X & Y) != Y
+  if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
+    std::swap(N0, N1);
+
+  if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
+      (Cond != ISD::SETEQ && Cond != ISD::SETNE))
+    return SDValue();
+
+  SDValue X, Y;
+  if (N0.getOperand(0) == N1) {
+    X = N0.getOperand(1);
+    Y = N0.getOperand(0);
+  } else if (N0.getOperand(1) == N1) {
+    X = N0.getOperand(0);
+    Y = N0.getOperand(1);
+  } else {
+    return SDValue();
+  }
+
+  // Bail out if the compare operand that we want to turn into a zero is already
+  // a zero (otherwise, infinite loop).
+  auto *YConst = dyn_cast<ConstantSDNode>(Y);
+  if (YConst && YConst->isNullValue())
+    return SDValue();
+
+  // We don't want to do this transform if the mask is a single bit because
+  // there are more efficient ways to deal with that case (for example, 'bt' on
+  // x86 or 'rlwinm' on PPC).
+  if (!DAG.getTargetLoweringInfo().hasAndNotCompare(Y) ||
+      valueHasExactlyOneBitSet(Y, DAG))
+    return SDValue();
+
+  // Transform this into: ~X & Y == 0.
+  EVT OpVT = X.getValueType();
+  SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
+  SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
+  return DAG.getSetCC(dl, VT, NewAnd, DAG.getConstant(0, dl, OpVT), Cond);
+}
+
 /// Try to simplify a setcc built with the specified operands and cc. If it is
 /// unable to simplify it, return a null SDValue.
 SDValue
@ -2166,6 +2212,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
    return N0;
  }

+  if (SDValue AndNotCC = createAndNotSetCC(VT, N0, N1, Cond, DAG, dl))
+    return AndNotCC;
+
  // Could not fold it.
  return SDValue();
 }
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -4122,6 +4122,18 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
  return Subtarget.hasLZCNT();
 }

+bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
+  if (!Subtarget.hasBMI())
+    return false;
+
+  // There are only 32-bit and 64-bit forms for 'andn'.
+  EVT VT = Y.getValueType();
+  if (VT != MVT::i32 && VT != MVT::i64)
+    return false;
+
+  return true;
+}
+
 /// Return true if every element in Mask, beginning
 /// from position Pos and ending in Pos+Size is undef.
 static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@ -751,6 +751,8 @@ namespace llvm {

    bool isCheapToSpeculateCtlz() const override;

+    bool hasAndNotCompare(SDValue Y) const override;
+
    /// Return the value type to use for ISD::SETCC.
    EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
                           EVT VT) const override;
--- a/llvm/test/CodeGen/X86/bmi.ll
+++ b/llvm/test/CodeGen/X86/bmi.ll
@ -135,12 +135,11 @@ define i1 @andn_cmp(i32 %x, i32 %y) {
  ret i1 %cmp
 }

-; TODO: Recognize a disguised andn in the following 4 tests.
+; Recognize a disguised andn in the following 4 tests.
 define i1 @and_cmp1(i32 %x, i32 %y) {
 ; CHECK-LABEL: and_cmp1:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andl %esi, %edi
-; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    andnl %esi, %edi, %eax
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
  %and = and i32 %x, %y
@ -151,8 +150,7 @@ define i1 @and_cmp1(i32 %x, i32 %y) {
 define i1 @and_cmp2(i32 %x, i32 %y) {
 ; CHECK-LABEL: and_cmp2:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andl %esi, %edi
-; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    andnl %esi, %edi, %eax
 ; CHECK-NEXT:    setne %al
 ; CHECK-NEXT:    retq
  %and = and i32 %y, %x
@ -163,8 +161,7 @@ define i1 @and_cmp2(i32 %x, i32 %y) {
 define i1 @and_cmp3(i32 %x, i32 %y) {
 ; CHECK-LABEL: and_cmp3:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andl %esi, %edi
-; CHECK-NEXT:    cmpl %edi, %esi
+; CHECK-NEXT:    andnl %esi, %edi, %eax
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
  %and = and i32 %x, %y
@ -175,8 +172,7 @@ define i1 @and_cmp3(i32 %x, i32 %y) {
 define i1 @and_cmp4(i32 %x, i32 %y) {
 ; CHECK-LABEL: and_cmp4:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andl %esi, %edi
-; CHECK-NEXT:    cmpl %edi, %esi
+; CHECK-NEXT:    andnl %esi, %edi, %eax
 ; CHECK-NEXT:    setne %al
 ; CHECK-NEXT:    retq
  %and = and i32 %y, %x
@ -189,8 +185,8 @@ define i1 @and_cmp4(i32 %x, i32 %y) {
 define i1 @and_cmp_const(i32 %x) {
 ; CHECK-LABEL: and_cmp_const:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andl $43, %edi
-; CHECK-NEXT:    cmpl $43, %edi
+; CHECK-NEXT:    movl $43, %eax
+; CHECK-NEXT:    andnl %eax, %edi, %eax
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
  %and = and i32 %x, 43
@ -198,6 +194,63 @@ define i1 @and_cmp_const(i32 %x) {
  ret i1 %cmp
 }

+; But don't use 'andn' if the mask is a power-of-two.
+define i1 @and_cmp_const_power_of_two(i32 %x, i32 %y) {
+; CHECK-LABEL: and_cmp_const_power_of_two:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    btl %esi, %edi
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    retq
+;
+  %shl = shl i32 1, %y
+  %and = and i32 %x, %shl
+  %cmp = icmp ne i32 %and, %shl
+  ret i1 %cmp
+}
+
+; Don't transform to 'andn' if there's another use of the 'and'.
+define i32 @and_cmp_not_one_use(i32 %x) {
+; CHECK-LABEL: and_cmp_not_one_use:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andl $37, %edi
+; CHECK-NEXT:    cmpl $37, %edi
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    addl %edi, %eax
+; CHECK-NEXT:    retq
+;
+  %and = and i32 %x, 37
+  %cmp = icmp eq i32 %and, 37
+  %ext = zext i1 %cmp to i32
+  %add = add i32 %and, %ext
+  ret i32 %add
+}
+
+; Verify that we're not transforming invalid comparison predicates.
+define i1 @not_an_andn1(i32 %x, i32 %y) {
+; CHECK-LABEL: not_an_andn1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andl %esi, %edi
+; CHECK-NEXT:    cmpl %edi, %esi
+; CHECK-NEXT:    setg %al
+; CHECK-NEXT:    retq
+  %and = and i32 %x, %y
+  %cmp = icmp sgt i32 %y, %and
+  ret i1 %cmp
+}
+
+define i1 @not_an_andn2(i32 %x, i32 %y) {
+; CHECK-LABEL: not_an_andn2:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andl %esi, %edi
+; CHECK-NEXT:    cmpl %edi, %esi
+; CHECK-NEXT:    setbe %al
+; CHECK-NEXT:    retq
+  %and = and i32 %y, %x
+  %cmp = icmp ule i32 %y, %and
+  ret i1 %cmp
+}
+
 ; Don't choose a 'test' if an 'andn' can be used.
 define i1 @andn_cmp_swap_ops(i64 %x, i64 %y) {
 ; CHECK-LABEL: andn_cmp_swap_ops: