From a4092104cb21ba98edf8ed48ce5640bb0725c406 Mon Sep 17 00:00:00 2001
From: Elliot Colp <colpell@ca.ibm.com>
Date: Tue, 23 Aug 2016 14:03:02 +0000
Subject: [PATCH] Fix SystemZ hang caused by r279105

The change in r279105 causes an infinite loop in some cases, as it sets the upper bits of an AND mask constant, which DAGCombiner::SimplifyDemandedBits then unsets.
This patch reverts that part of the behaviour, instead relying on .td peepholes to perform the transformation to NILL. I reapplied my original fix for the problem addressed by r279105 (unsetting the upper bits, which prevents a compiler abort for a different reason).

Differential Revision: https://reviews.llvm.org/D23781

llvm-svn: 279515
---
 .../Target/SystemZ/SystemZISelLowering.cpp    | 47 +++++++++----------
 llvm/lib/Target/SystemZ/SystemZInstrInfo.td   | 31 ++++++++++++
 2 files changed, 52 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 8d8dbfdf7b67..3d6e39f3a1b9 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -5095,21 +5095,21 @@ SDValue SystemZTargetLowering::combineSHIFTROT(
   // value that has its last 6 bits set, we can safely remove the AND operation.
   //
   // If the AND operation doesn't have the last 6 bits set, we can't remove it
-  // entirely, but we can still truncate it to a 16-bit value with all other
-  // bits set. This will allow us to generate a NILL instead of a NILF for
-  // smaller code size.
+  // entirely, but we can still truncate it to a 16-bit value. This prevents
+  // us from ending up with a NILL with a signed operand, which will cause the
+  // instruction printer to abort.
   SDValue N1 = N->getOperand(1);
   if (N1.getOpcode() == ISD::AND) {
-    SDValue AndOp = N1->getOperand(0);
     SDValue AndMaskOp = N1->getOperand(1);
     auto *AndMask = dyn_cast<ConstantSDNode>(AndMaskOp);
 
     // The AND mask is constant
     if (AndMask) {
-      uint64_t AmtVal = AndMask->getZExtValue();
-
+      auto AmtVal = AndMask->getZExtValue();
+      
       // Bottom 6 bits are set
       if ((AmtVal & 0x3f) == 0x3f) {
+        SDValue AndOp = N1->getOperand(0);
 
         // This is the only use, so remove the node
         if (N1.hasOneUse()) {
@@ -5129,30 +5129,25 @@ SDValue SystemZTargetLowering::combineSHIFTROT(
           return Replace;
         }
 
-      // We can't remove the AND, but we can use NILL here instead of NILF if we
-      // truncate the mask to 16 bits and set the remaining bits
-      } else {
-        unsigned BitWidth = AndMask->getAPIntValue().getBitWidth();
+      // We can't remove the AND, but we can use NILL here (normally we would
+      // use NILF). Only keep the last 16 bits of the mask. The actual
+      // transformation will be handled by .td definitions.
+      } else if (AmtVal >> 16 != 0) {
+        SDValue AndOp = N1->getOperand(0);
 
-        // All bits for the operand's size except the lower 16
-        uint64_t UpperBits = ((1ull << (uint64_t)BitWidth) - 1ull) &
-                             0xffffffffffff0000ull;
+        auto NewMask = DAG.getConstant(AndMask->getZExtValue() & 0x0000ffff,
+                                       SDLoc(AndMaskOp),
+                                       AndMaskOp.getValueType());
 
-        if ((AmtVal & UpperBits) != UpperBits) {
-          auto NewMaskValue = (AmtVal & 0xffff) | UpperBits;
+        auto NewAnd = DAG.getNode(N1.getOpcode(), SDLoc(N1), N1.getValueType(),
+                                  AndOp, NewMask);
 
-          auto NewMask = DAG.getConstant(NewMaskValue,
-                                         SDLoc(AndMaskOp),
-                                         AndMaskOp.getValueType());
-          auto NewAnd = DAG.getNode(N1.getOpcode(), SDLoc(N1), N1.getValueType(),
-                                    AndOp, NewMask);
-          auto Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
-                                     N->getValueType(0), N->getOperand(0),
-                                     NewAnd);
-          DCI.AddToWorklist(Replace.getNode());
+        SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
+                                      N->getValueType(0), N->getOperand(0),
+                                      NewAnd);
+        DCI.AddToWorklist(Replace.getNode());
 
-          return Replace;
-        }
+        return Replace;
       }
     }
   }
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index b3088508e595..4ea3f9c5f543 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1834,6 +1834,37 @@ def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, imm32zx4:$valid,
 def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y),
                           (XGR GR64:$y, (NGR GR64:$y, GR64:$x))>;
 
+// Shift/rotate instructions only use the last 6 bits of the second operand
+// register, so we can safely use NILL (16 fewer bits than NILF) to only AND the
+// last 16 bits.
+// Complexity is added so that we match this before we match NILF on the AND
+// operation alone.
+let AddedComplexity = 4 in {
+  def : Pat<(shl GR32:$val, (and GR32:$shift, uimm32:$imm)),
+            (SLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(sra GR32:$val, (and GR32:$shift, uimm32:$imm)),
+            (SRA GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(srl GR32:$val, (and GR32:$shift, uimm32:$imm)),
+            (SRL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(shl GR64:$val, (and GR32:$shift, uimm32:$imm)),
+            (SLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(sra GR64:$val, (and GR32:$shift, uimm32:$imm)),
+            (SRAG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(srl GR64:$val, (and GR32:$shift, uimm32:$imm)),
+            (SRLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(rotl GR32:$val, (and GR32:$shift, uimm32:$imm)),
+            (RLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(rotl GR64:$val, (and GR32:$shift, uimm32:$imm)),
+            (RLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+}
+
 // Peepholes for turning scalar operations into block operations.
 defm : BlockLoadStore<anyextloadi8, i32, MVCSequence, NCSequence, OCSequence,
                       XCSequence, 1>;