From fa8bad8a0f85dbf5abf0cc8f404fec13d5b1e0fa Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sat, 23 Sep 2017 14:32:07 +0000
Subject: [PATCH] [x86] reduce 64-bit mask constant to 32-bits by right
 shifting

This is a follow-up from D38181 (r314023). We have to put 64-bit
constants into a register using a separate instruction, so we
should try harder to avoid that.

From what I see, we're not likely to encounter this pattern in the
DAG because the upstream setcc combines from this don't (usually?)
produce this pattern. If we fix that, then this will become more
relevant. Since the cost of handling this case is just loosening
the predicate of the existing fold, we might as well do it now.

llvm-svn: 314064
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 27 +++++++++++++------------
 llvm/test/CodeGen/X86/shift-and.ll      |  6 +++---
 2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8c39ae47f31c..a5e65f839360 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31688,21 +31688,22 @@ static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG) {
   if (!ShiftC || !AndC)
     return SDValue();
 
-  // If the 'and' mask is already smaller than a byte, then don't bother.
-  // If the new 'and' mask would be bigger than a byte, then don't bother.
-  // If the mask fits in a byte, then we know we can generate smaller and
-  // potentially better code by shifting first.
-  // TODO: Always try to shrink a mask that is over 32-bits?
+  // If we can shrink the constant mask below 8-bits or 32-bits, then this
+  // transform should reduce code size. It may also enable secondary transforms
+  // from improved known-bits analysis or instruction selection.
   APInt MaskVal = AndC->getAPIntValue();
   APInt NewMaskVal = MaskVal.lshr(ShiftC->getAPIntValue());
-  if (MaskVal.getMinSignedBits() <= 8 || NewMaskVal.getMinSignedBits() > 8)
-    return SDValue();
-
-  // srl (and X, AndC), ShiftC --> and (srl X, ShiftC), (AndC >> ShiftC)
-  SDLoc DL(N);
-  SDValue NewMask = DAG.getConstant(NewMaskVal, DL, VT);
-  SDValue NewShift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
-  return DAG.getNode(ISD::AND, DL, VT, NewShift, NewMask);
+  unsigned OldMaskSize = MaskVal.getMinSignedBits();
+  unsigned NewMaskSize = NewMaskVal.getMinSignedBits();
+  if ((OldMaskSize > 8 && NewMaskSize <= 8) ||
+      (OldMaskSize > 32 && NewMaskSize <= 32)) {
+    // srl (and X, AndC), ShiftC --> and (srl X, ShiftC), (AndC >> ShiftC)
+    SDLoc DL(N);
+    SDValue NewMask = DAG.getConstant(NewMaskVal, DL, VT);
+    SDValue NewShift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
+    return DAG.getNode(ISD::AND, DL, VT, NewShift, NewMask);
+  }
+  return SDValue();
 }
 
 /// \brief Returns a vector of 0s if the node in input is a vector logical
diff --git a/llvm/test/CodeGen/X86/shift-and.ll b/llvm/test/CodeGen/X86/shift-and.ll
index 69ec2f8fae76..f1f508c225d0 100644
--- a/llvm/test/CodeGen/X86/shift-and.ll
+++ b/llvm/test/CodeGen/X86/shift-and.ll
@@ -205,9 +205,9 @@ define i64 @big_mask_constant(i64 %x) nounwind {
 ;
 ; X64-LABEL: big_mask_constant:
 ; X64:       # BB#0:
-; X64-NEXT:    movabsq $17179869184, %rax # imm = 0x400000000
-; X64-NEXT:    andq %rdi, %rax
-; X64-NEXT:    shrq $7, %rax
+; X64-NEXT:    shrq $7, %rdi
+; X64-NEXT:    andl $134217728, %edi # imm = 0x8000000
+; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    retq
   %and = and i64 %x, 17179869184 ; 0x400000000
   %sh = lshr i64 %and, 7