From fd7f4ae6d497166a5d144d8450db84800796809d Mon Sep 17 00:00:00 2001
From: Richard Sandiford <rsandifo@linux.vnet.ibm.com>
Date: Thu, 1 Aug 2013 10:39:40 +0000
Subject: [PATCH] [SystemZ] Reuse CC results for integer comparisons with zero

This also fixes a bug in the predication of LR to LOCR: I'd forgotten
that with these in-place instruction builds, the implicit operands need
to be added manually.  I think this was latent until now, but is tested
by int-cmp-45.c.  It also adds a CC valid mask to STOC, again tested by
int-cmp-45.c.

llvm-svn: 187573
---
 .../Target/SystemZ/SystemZISelLowering.cpp    |   3 +-
 .../lib/Target/SystemZ/SystemZInstrFormats.td |  49 +-
 llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp  |   3 +-
 llvm/lib/Target/SystemZ/SystemZInstrInfo.h    |  24 +-
 llvm/lib/Target/SystemZ/SystemZInstrInfo.td   |  71 ++-
 llvm/lib/Target/SystemZ/SystemZLongBranch.cpp | 187 +++++-
 llvm/test/CodeGen/SystemZ/int-cmp-44.ll       | 576 ++++++++++++++++++
 llvm/test/CodeGen/SystemZ/int-cmp-45.ll       | 115 ++++
 8 files changed, 969 insertions(+), 59 deletions(-)
 create mode 100644 llvm/test/CodeGen/SystemZ/int-cmp-44.ll
 create mode 100644 llvm/test/CodeGen/SystemZ/int-cmp-45.ll
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index ffd842d49a34..6acdcd4bef0c 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1813,7 +1813,8 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
     if (Invert)
       CCMask ^= CCValid;
     BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
-      .addReg(SrcReg).addOperand(Base).addImm(Disp).addImm(CCMask);
+      .addReg(SrcReg).addOperand(Base).addImm(Disp)
+      .addImm(CCValid).addImm(CCMask);
     MI->eraseFromParent();
     return MBB;
   }
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index 915891d09d7c..98837149030a 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -61,12 +61,41 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr,
   // The access size of all memory operands in bytes, or 0 if not known.
   bits<5> AccessBytes = 0;
 
-  let TSFlags{0} = SimpleBDXLoad;
-  let TSFlags{1} = SimpleBDXStore;
-  let TSFlags{2} = Has20BitOffset;
-  let TSFlags{3} = HasIndex;
-  let TSFlags{4} = Is128Bit;
-  let TSFlags{9-5} = AccessBytes;
+  // If the instruction sets CC to a useful value, this gives the mask
+  // of all possible CC results.  The mask has the same form as
+  // SystemZ::CCMASK_*.
+  bits<4> CCValues = 0;
+
+  // True if the instruction sets CC to 0 when the result is 0.
+  bit CCHasZero = 0;
+
+  // True if the instruction sets CC to 1 when the result is less than 0
+  // and to 2 when the result is greater than 0.
+  bit CCHasOrder = 0;
+
+  // True if the instruction is conditional and if the CC mask operand
+  // comes first (as for BRC, etc.).
+  bit CCMaskFirst = 0;
+
+  // Similar, but true if the CC mask operand comes last (as for LOC, etc.).
+  bit CCMaskLast = 0;
+
+  // True if the instruction is the "logical" rather than "arithmetic" form,
+  // in cases where a distinction exists.
+  bit IsLogical = 0;
+
+  let TSFlags{0}     = SimpleBDXLoad;
+  let TSFlags{1}     = SimpleBDXStore;
+  let TSFlags{2}     = Has20BitOffset;
+  let TSFlags{3}     = HasIndex;
+  let TSFlags{4}     = Is128Bit;
+  let TSFlags{9-5}   = AccessBytes;
+  let TSFlags{13-10} = CCValues;
+  let TSFlags{14}    = CCHasZero;
+  let TSFlags{15}    = CCHasOrder;
+  let TSFlags{16}    = CCMaskFirst;
+  let TSFlags{17}    = CCMaskLast;
+  let TSFlags{18}    = IsLogical;
 }
 
 //===----------------------------------------------------------------------===//
@@ -623,11 +652,12 @@ multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
 class CondStoreRSY<string mnemonic, bits<16> opcode,
                    RegisterOperand cls, bits<5> bytes,
                    AddressingMode mode = bdaddr20only>
-  : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, cond4:$R3),
+  : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, cond4:$valid, cond4:$R3),
             mnemonic#"$R3\t$R1, $BD2", []>,
     Requires<[FeatureLoadStoreOnCond]> {
   let mayStore = 1;
   let AccessBytes = bytes;
+  let CCMaskLast = 1;
 }
 
 // Like CondStoreRSY, but used for the raw assembly form.  The condition-code
@@ -686,7 +716,9 @@ class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
                    RegisterOperand cls2>
   : InstRRF<opcode, (outs cls1:$R1), (ins cls2:$R2, cond4:$valid, cond4:$R3),
             mnemonic#"r$R3\t$R1, $R2", []>,
-    Requires<[FeatureLoadStoreOnCond]>;
+    Requires<[FeatureLoadStoreOnCond]> {
+  let CCMaskLast = 1;
+}
 
 // Like CondUnaryRRF, but used for the raw assembly form.  The condition-code
 // mask is the third operand rather than being part of the mnemonic.
@@ -748,6 +780,7 @@ class CondUnaryRSY<string mnemonic, bits<16> opcode,
   let DisableEncoding = "$R1src";
   let mayLoad = 1;
   let AccessBytes = bytes;
+  let CCMaskLast = 1;
 }
 
 // Like CondUnaryRSY, but used for the raw assembly form.  The condition-code
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 2b604a99fdb7..9913db7b0e42 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -341,7 +341,8 @@ PredicateInstruction(MachineInstr *MI,
     if (unsigned CondOpcode = getConditionalMove(Opcode)) {
       MI->setDesc(get(CondOpcode));
       MachineInstrBuilder(*MI->getParent()->getParent(), MI)
-        .addImm(CCValid).addImm(CCMask);
+        .addImm(CCValid).addImm(CCMask)
+        .addReg(SystemZ::CC, RegState::Implicit);;
       return true;
     }
   }
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 917ac6e348e1..763a3956fc1e 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -28,17 +28,27 @@ class SystemZTargetMachine;
 namespace SystemZII {
   enum {
     // See comments in SystemZInstrFormats.td.
-    SimpleBDXLoad  = (1 << 0),
-    SimpleBDXStore = (1 << 1),
-    Has20BitOffset = (1 << 2),
-    HasIndex       = (1 << 3),
-    Is128Bit       = (1 << 4),
-    AccessSizeMask = (31 << 5),
-    AccessSizeShift = 5
+    SimpleBDXLoad   = (1 << 0),
+    SimpleBDXStore  = (1 << 1),
+    Has20BitOffset  = (1 << 2),
+    HasIndex        = (1 << 3),
+    Is128Bit        = (1 << 4),
+    AccessSizeMask  = (31 << 5),
+    AccessSizeShift = 5,
+    CCValuesMask    = (15 << 10),
+    CCValuesShift   = 10,
+    CCHasZero       = (1 << 14),
+    CCHasOrder      = (1 << 15),
+    CCMaskFirst     = (1 << 16),
+    CCMaskLast      = (1 << 17),
+    IsLogical       = (1 << 18)
   };
   static inline unsigned getAccessSize(unsigned int Flags) {
     return (Flags & AccessSizeMask) >> AccessSizeShift;
   }
+  static inline unsigned getCCValues(unsigned int Flags) {
+    return (Flags & CCValuesMask) >> CCValuesShift;
+  }
 
   // SystemZ MachineOperand target flags.
   enum {
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 341eb9040409..748539aa5b63 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -59,7 +59,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in {
 // the first operand.  It seems friendlier to use mnemonic forms like
 // JE and JLH when writing out the assembly though.
 let isBranch = 1, isTerminator = 1, Uses = [CC] in {
-  let isCodeGenOnly = 1 in {
+  let isCodeGenOnly = 1, CCMaskFirst = 1 in {
     def BRC : InstRI<0xA74, (outs), (ins cond4:$valid, cond4:$R1,
                                          brtarget16:$I2), "j$R1\t$I2",
                      [(z_br_ccmask cond4:$valid, cond4:$R1, bb:$I2)]>;
@@ -195,7 +195,7 @@ defm CondStore64 : CondStores<GR64, nonvolatile_store,
 
 // The definitions here are for the call-clobbered registers.
 let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D,
-                        F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D],
+                        F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D, CC],
     R1 = 14, isCodeGenOnly = 1 in {
   def BRAS  : InstRI<0xA75, (outs), (ins pcrel16call:$I2, variable_ops),
                      "bras\t%r14, $I2", []>;
@@ -512,9 +512,12 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
 //===----------------------------------------------------------------------===//
 
 let Defs = [CC] in {
-  def LCR   : UnaryRR <"lc",   0x13,   ineg,      GR32, GR32>;
-  def LCGR  : UnaryRRE<"lcg",  0xB903, ineg,      GR64, GR64>;
-  def LCGFR : UnaryRRE<"lcgf", 0xB913, null_frag, GR64, GR32>;
+  let CCValues = 0xF, CCHasZero = 1 in {
+    def LCR  : UnaryRR <"lc",  0x13,   ineg, GR32, GR32>;
+    def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>;
+  }
+  let CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in
+    def LCGFR : UnaryRRE<"lcgf", 0xB913, null_frag, GR64, GR32>;
 }
 defm : SXU<ineg, LCGFR>;
 
@@ -566,7 +569,7 @@ def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm),
 //===----------------------------------------------------------------------===//
 
 // Plain addition.
-let Defs = [CC] in {
+let Defs = [CC], CCValues = 0xF, CCHasZero = 1 in {
   // Addition of a register.
   let isCommutable = 1 in {
     defm AR : BinaryRRAndK<"a", 0x1A, 0xB9F8, add, GR32, GR32>;
@@ -637,7 +640,7 @@ let Defs = [CC], Uses = [CC] in {
 
 // Plain substraction.  Although immediate forms exist, we use the
 // add-immediate instruction instead.
-let Defs = [CC] in {
+let Defs = [CC], CCValues = 0xF, CCHasZero = 1 in {
   // Subtraction of a register.
   defm SR : BinaryRRAndK<"s", 0x1B, 0xB9F9, sub, GR32, GR32>;
   def SGFR : BinaryRRE<"sgf", 0xB919, null_frag, GR64, GR32>;
@@ -687,13 +690,14 @@ let Defs = [CC], Uses = [CC] in {
 
 let Defs = [CC] in {
   // ANDs of a register.
-  let isCommutable = 1 in {
+  let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in {
     defm NR : BinaryRRAndK<"n", 0x14, 0xB9F4, and, GR32, GR32>;
     defm NGR : BinaryRREAndK<"ng", 0xB980, 0xB9E4, and, GR64, GR64>;
   }
 
   let isConvertibleToThreeAddress = 1 in {
     // ANDs of a 16-bit immediate, leaving other bits unaffected.
+    // The CC result only reflects the 16-bit field, not the full register.
     let isCodeGenOnly = 1 in {
       def NILL32 : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>;
       def NILH32 : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>;
@@ -704,15 +708,19 @@ let Defs = [CC] in {
     def NIHH : BinaryRI<"nihh", 0xA54, and, GR64, imm64hh16c>;
 
     // ANDs of a 32-bit immediate, leaving other bits unaffected.
-    let isCodeGenOnly = 1 in
+    // The CC result only reflects the 32-bit field, which means we can
+    // use it as a zero indicator for i32 operations but not otherwise.
+    let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in
       def NILF32 : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>;
     def NILF : BinaryRIL<"nilf", 0xC0B, and, GR64, imm64lf32c>;
     def NIHF : BinaryRIL<"nihf", 0xC0A, and, GR64, imm64hf32c>;
   }
 
   // ANDs of memory.
-  defm N  : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
-  def  NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>;
+  let CCValues = 0xC, CCHasZero = 1 in {
+    defm N  : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
+    def  NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>; 
+  }
 
   // AND to memory
   defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, uimm8>;
@@ -726,12 +734,13 @@ defm : RMWIByte<and, bdaddr20pair, NIY>;
 
 let Defs = [CC] in {
   // ORs of a register.
-  let isCommutable = 1 in {
+  let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in {
     defm OR : BinaryRRAndK<"o", 0x16, 0xB9F6, or, GR32, GR32>;
     defm OGR : BinaryRREAndK<"og", 0xB981, 0xB9E6, or, GR64, GR64>;
   }
 
   // ORs of a 16-bit immediate, leaving other bits unaffected.
+  // The CC result only reflects the 16-bit field, not the full register.
   let isCodeGenOnly = 1 in {
     def OILL32 : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>;
     def OILH32 : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>;
@@ -742,14 +751,18 @@ let Defs = [CC] in {
   def OIHH : BinaryRI<"oihh", 0xA58, or, GR64, imm64hh16>;
 
   // ORs of a 32-bit immediate, leaving other bits unaffected.
-  let isCodeGenOnly = 1 in
+  // The CC result only reflects the 32-bit field, which means we can
+  // use it as a zero indicator for i32 operations but not otherwise.
+  let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in
     def OILF32 : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>;
   def OILF : BinaryRIL<"oilf", 0xC0D, or, GR64, imm64lf32>;
   def OIHF : BinaryRIL<"oihf", 0xC0C, or, GR64, imm64hf32>;
 
   // ORs of memory.
-  defm O  : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>;
-  def  OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>;
+  let CCValues = 0xC, CCHasZero = 1 in {
+    defm O  : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>;
+    def  OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>;
+  }
 
   // OR to memory
   defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, uimm8>;
@@ -763,20 +776,24 @@ defm : RMWIByte<or, bdaddr20pair, OIY>;
 
 let Defs = [CC] in {
   // XORs of a register.
-  let isCommutable = 1 in {
+  let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in {
     defm XR : BinaryRRAndK<"x", 0x17, 0xB9F7, xor, GR32, GR32>;
     defm XGR : BinaryRREAndK<"xg", 0xB982, 0xB9E7, xor, GR64, GR64>;
   }
 
   // XORs of a 32-bit immediate, leaving other bits unaffected.
-  let isCodeGenOnly = 1 in
+  // The CC result only reflects the 32-bit field, which means we can
+  // use it as a zero indicator for i32 operations but not otherwise.
+  let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in
     def XILF32 : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>;
   def XILF : BinaryRIL<"xilf", 0xC07, xor, GR64, imm64lf32>;
   def XIHF : BinaryRIL<"xihf", 0xC06, xor, GR64, imm64hf32>;
 
   // XORs of memory.
-  defm X  : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>;
-  def  XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>;
+  let CCValues = 0xC, CCHasZero = 1 in {
+    defm X  : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>;
+    def  XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>;
+  }
 
   // XOR to memory
   defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, uimm8>;
@@ -849,7 +866,7 @@ let neverHasSideEffects = 1 in {
 }
 
 // Arithmetic shift right.
-let Defs = [CC] in {
+let Defs = [CC], CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in {
   defm SRA : ShiftRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>;
   def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64>;
 }
@@ -862,11 +879,12 @@ let neverHasSideEffects = 1 in {
 
 // Rotate second operand left and inserted selected bits into first operand.
 // These can act like 32-bit operands provided that the constant start and
-// end bits (operands 2 and 3) are in the range [32, 64)
+// end bits (operands 2 and 3) are in the range [32, 64).
 let Defs = [CC] in {
   let isCodeGenOnly = 1 in
-    def RISBG32 : RotateSelectRIEf<"risbg",  0xEC55, GR32, GR32>;
-  def RISBG : RotateSelectRIEf<"risbg",  0xEC55, GR64, GR64>;
+    def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>;
+  let CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in
+    def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>;
 }
 
 // Forms of RISBG that only affect one word of the destination register.
@@ -880,7 +898,8 @@ def RISBLG : RotateSelectRIEf<"risblg", 0xEC51, GR64, GR64>,
              Requires<[FeatureHighWord]>;
 
 // Rotate second operand left and perform a logical operation with selected
-// bits of the first operand.
+// bits of the first operand.  The CC result only describes the selected bits,
+// so isn't useful for a full comparison against zero.
 let Defs = [CC] in {
   def RNSBG : RotateSelectRIEf<"rnsbg", 0xEC54, GR64, GR64>;
   def ROSBG : RotateSelectRIEf<"rosbg", 0xEC56, GR64, GR64>;
@@ -892,7 +911,7 @@ let Defs = [CC] in {
 //===----------------------------------------------------------------------===//
 
 // Signed comparisons.
-let Defs = [CC] in {
+let Defs = [CC], CCValues = 0xE in {
   // Comparison with a register.
   def CR   : CompareRR <"c",   0x19,   z_cmp,     GR32, GR32>;
   def CGFR : CompareRRE<"cgf", 0xB930, null_frag, GR64, GR32>;
@@ -926,7 +945,7 @@ let Defs = [CC] in {
 defm : SXB<z_cmp, GR64, CGFR>;
 
 // Unsigned comparisons.
-let Defs = [CC] in {
+let Defs = [CC], CCValues = 0xE, IsLogical = 1 in {
   // Comparison with a register.
   def CLR   : CompareRR <"cl",   0x15,   z_ucmp,    GR32, GR32>;
   def CLGFR : CompareRRE<"clgf", 0xB931, null_frag, GR64, GR32>;
diff --git a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
index 9b637c01c6eb..f0ea3e20be61 100644
--- a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -7,18 +7,36 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This pass does two things:
-// (1) fuse compares and branches into COMPARE AND BRANCH instructions
-// (2) make sure that all branches are in range.
+// This pass does three things:
+// (1) try to remove compares if CC already contains the required information
+// (2) fuse compares and branches into COMPARE AND BRANCH instructions
+// (3) make sure that all branches are in range.
 //
-// We do (1) here rather than earlier because the fused form prevents
-// predication.
+// We do (1) here rather than earlier because some transformations can
+// change the set of available CC values and we generally want those
+// transformations to have priority over (1).  This is especially true in
+// the commonest case where the CC value is used by a single in-range branch
+// instruction, since (2) will then be able to fuse the compare and the
+// branch instead.
 //
-// Doing it so late makes it more likely that a register will be reused
+// For example, two-address NILF can sometimes be converted into
+// three-address RISBLG.  NILF produces a CC value that indicates whether
+// the low word is zero, but RISBLG does not modify CC at all.  On the
+// other hand, 64-bit ANDs like NILL can sometimes be converted to RISBG.
+// The CC value produced by NILL isn't useful for our purposes, but the
+// value produced by RISBG can be used for any comparison with zero
+// (not just equality).  So there are some transformations that lose
+// CC values (while still being worthwhile) and others that happen to make
+// the CC result more useful than it was originally.
+//
+// We do (2) here rather than earlier because the fused form prevents
+// predication.  It also has to happen after (1).
+//
+// Doing (2) so late makes it more likely that a register will be reused
 // between the compare and the branch, but it isn't clear whether preventing
 // that would be a win or not.
 //
-// There are several ways in which (2) could be done.  One aggressive
+// There are several ways in which (3) could be done.  One aggressive
 // approach is to assume that all branches are in range and successively
 // replace those that turn out not to be in range with a longer form
 // (branch relaxation).  A simple implementation is to continually walk
@@ -156,6 +174,7 @@ namespace {
     void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator,
                         bool AssumeRelaxed);
     TerminatorInfo describeTerminator(MachineInstr *MI);
+    bool optimizeCompareZero(MachineInstr *PrevCCSetter, MachineInstr *Compare);
     bool fuseCompareAndBranch(MachineInstr *Compare);
     uint64_t initMBBInfo();
     bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address);
@@ -254,6 +273,15 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) {
   return Terminator;
 }
 
+// Return true if CC is live out of MBB.
+static bool isCCLiveOut(MachineBasicBlock *MBB) {
+  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+         SE = MBB->succ_end(); SI != SE; ++SI)
+    if ((*SI)->isLiveIn(SystemZ::CC))
+      return true;
+  return false;
+}
+
 // Return true if CC is live after MBBI.
 static bool isCCLiveAfter(MachineBasicBlock::iterator MBBI,
                           const TargetRegisterInfo *TRI) {
@@ -269,12 +297,130 @@ static bool isCCLiveAfter(MachineBasicBlock::iterator MBBI,
       return false;
   }
 
-  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
-         SE = MBB->succ_end(); SI != SE; ++SI)
-    if ((*SI)->isLiveIn(SystemZ::CC))
-      return true;
+  return isCCLiveOut(MBB);
+}
 
-  return false;
+// Return true if all uses of the CC value produced by MBBI could make do
+// with the CC values in ReusableCCMask.  When returning true, point AlterMasks
+// to the "CC valid" and "CC mask" operands for each condition.
+static bool canRestrictCCMask(MachineBasicBlock::iterator MBBI,
+                              unsigned ReusableCCMask,
+                              SmallVectorImpl<MachineOperand *> &AlterMasks,
+                              const TargetRegisterInfo *TRI) {
+  MachineBasicBlock *MBB = MBBI->getParent();
+  MachineBasicBlock::iterator MBBE = MBB->end();
+  for (++MBBI; MBBI != MBBE; ++MBBI) {
+    if (MBBI->readsRegister(SystemZ::CC, TRI)) {
+      // Fail if this isn't a use of CC that we understand.
+      unsigned MBBIFlags = MBBI->getDesc().TSFlags;
+      unsigned FirstOpNum;
+      if (MBBIFlags & SystemZII::CCMaskFirst)
+        FirstOpNum = 0;
+      else if (MBBIFlags & SystemZII::CCMaskLast)
+        FirstOpNum = MBBI->getNumExplicitOperands() - 2;
+      else
+        return false;
+
+      // Check whether the instruction predicate treats all CC values
+      // outside of ReusableCCMask in the same way.  In that case it
+      // doesn't matter what those CC values mean.
+      unsigned CCValid = MBBI->getOperand(FirstOpNum).getImm();
+      unsigned CCMask = MBBI->getOperand(FirstOpNum + 1).getImm();
+      unsigned OutValid = ~ReusableCCMask & CCValid;
+      unsigned OutMask = ~ReusableCCMask & CCMask;
+      if (OutMask != 0 && OutMask != OutValid)
+        return false;
+
+      AlterMasks.push_back(&MBBI->getOperand(FirstOpNum));
+      AlterMasks.push_back(&MBBI->getOperand(FirstOpNum + 1));
+
+      // Succeed if this was the final use of the CC value.
+      if (MBBI->killsRegister(SystemZ::CC, TRI))
+        return true;
+    }
+    // Succeed if the instruction redefines CC.
+    if (MBBI->definesRegister(SystemZ::CC, TRI))
+      return true;
+  }
+  // Fail if there are other uses of CC that we didn't see.
+  return !isCCLiveOut(MBB);
+}
+
+// Try to make Compare redundant with PrevCCSetter, the previous setter of CC,
+// by looking for cases where Compare compares the result of PrevCCSetter
+// against zero.  Return true on success and if Compare can therefore
+// be deleted.
+bool SystemZLongBranch::optimizeCompareZero(MachineInstr *PrevCCSetter,
+                                            MachineInstr *Compare) {
+  if (MF->getTarget().getOptLevel() == CodeGenOpt::None)
+    return false;
+
+  // Check whether this is a comparison against zero.
+  if (Compare->getNumExplicitOperands() != 2 ||
+      !Compare->getOperand(1).isImm() ||
+      Compare->getOperand(1).getImm() != 0)
+    return false;
+
+  // See which compare-style condition codes are available after PrevCCSetter.
+  unsigned PrevFlags = PrevCCSetter->getDesc().TSFlags;
+  unsigned ReusableCCMask = 0;
+  if (PrevFlags & SystemZII::CCHasZero)
+    ReusableCCMask |= SystemZ::CCMASK_CMP_EQ;
+
+  // For unsigned comparisons with zero, only equality makes sense.
+  unsigned CompareFlags = Compare->getDesc().TSFlags;
+  if (!(CompareFlags & SystemZII::IsLogical) &&
+      (PrevFlags & SystemZII::CCHasOrder))
+    ReusableCCMask |= SystemZ::CCMASK_CMP_LT | SystemZ::CCMASK_CMP_GT;
+
+  if (ReusableCCMask == 0)
+    return false;
+
+  // Make sure that PrevCCSetter sets the value being compared.
+  unsigned SrcReg = Compare->getOperand(0).getReg();
+  unsigned SrcSubReg = Compare->getOperand(0).getSubReg();
+  if (!PrevCCSetter->getOperand(0).isReg() ||
+      !PrevCCSetter->getOperand(0).isDef() ||
+      PrevCCSetter->getOperand(0).getReg() != SrcReg ||
+      PrevCCSetter->getOperand(0).getSubReg() != SrcSubReg)
+    return false;
+
+  // Make sure that SrcReg survives until Compare.
+  MachineBasicBlock::iterator MBBI = PrevCCSetter, MBBE = Compare;
+  const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
+  for (++MBBI; MBBI != MBBE; ++MBBI)
+    if (MBBI->modifiesRegister(SrcReg, TRI))
+      return false;
+
+  // See whether all uses of Compare's CC value could make do with
+  // the values produced by PrevCCSetter.
+  SmallVector<MachineOperand *, 4> AlterMasks;
+  if (!canRestrictCCMask(Compare, ReusableCCMask, AlterMasks, TRI))
+    return false;
+
+  // Alter the CC masks that canRestrictCCMask says need to be altered.
+  unsigned CCValues = SystemZII::getCCValues(PrevFlags);
+  assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues");
+  for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) {
+    AlterMasks[I]->setImm(CCValues);
+    unsigned CCMask = AlterMasks[I + 1]->getImm();
+    if (CCMask & ~ReusableCCMask)
+      AlterMasks[I + 1]->setImm((CCMask & ReusableCCMask) |
+                                (CCValues & ~ReusableCCMask));
+  }
+
+  // CC is now live after PrevCCSetter.
+  int CCDef = PrevCCSetter->findRegisterDefOperandIdx(SystemZ::CC, false,
+                                                      true, TRI);
+  assert(CCDef >= 0 && "Couldn't find CC set");
+  PrevCCSetter->getOperand(CCDef).setIsDead(false);
+
+  // Clear any intervening kills of CC.
+  MBBI = PrevCCSetter;
+  for (++MBBI; MBBI != MBBE; ++MBBI)
+    MBBI->clearRegisterKills(SystemZ::CC, TRI);
+
+  return true;
 }
 
 // Try to fuse compare instruction Compare into a later branch.  Return
@@ -345,6 +491,8 @@ bool SystemZLongBranch::fuseCompareAndBranch(MachineInstr *Compare) {
 // that no branches need relaxation.  Return the size of the function under
 // this assumption.
 uint64_t SystemZLongBranch::initMBBInfo() {
+  const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
+
   MF->RenumberBlocks();
   unsigned NumBlocks = MF->size();
 
@@ -365,13 +513,20 @@ uint64_t SystemZLongBranch::initMBBInfo() {
     // Calculate the size of the fixed part of the block.
     MachineBasicBlock::iterator MI = MBB->begin();
     MachineBasicBlock::iterator End = MBB->end();
+    MachineInstr *PrevCCSetter = 0;
     while (MI != End && !MI->isTerminator()) {
       MachineInstr *Current = MI;
       ++MI;
-      if (Current->isCompare() && fuseCompareAndBranch(Current))
-        Current->removeFromParent();
-      else
-        Block.Size += TII->getInstSizeInBytes(Current);
+      if (Current->isCompare()) {
+        if ((PrevCCSetter && optimizeCompareZero(PrevCCSetter, Current)) ||
+            fuseCompareAndBranch(Current)) {
+          Current->removeFromParent();
+          continue;
+        }
+      }
+      if (Current->modifiesRegister(SystemZ::CC, TRI))
+        PrevCCSetter = Current;
+      Block.Size += TII->getInstSizeInBytes(Current);
     }
     skipNonTerminators(Position, Block);
 
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-44.ll b/llvm/test/CodeGen/SystemZ/int-cmp-44.ll
new file mode 100644
index 000000000000..5218d41c6ad3
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-44.ll
@@ -0,0 +1,576 @@
+; Test that compares are ommitted if CC already has the right value
+; (z10 version).
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+
+declare void @foo()
+
+; Addition provides enough for equality comparisons with zero.  First teest
+; the EQ case.
+define i32 @f1(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f1:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: je .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = add i32 %a, 1000000
+  %cmp = icmp eq i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...and again with NE.
+define i32 @f2(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f2:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: jne .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = add i32 %a, 1000000
+  %cmp = icmp ne i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; SLT requires a comparison.
+define i32 @f3(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f3:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: cijl %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = add i32 %a, 1000000
+  %cmp = icmp slt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...SLE too.
+define i32 @f4(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f4:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: cijle %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = add i32 %a, 1000000
+  %cmp = icmp sle i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...SGT too.
+define i32 @f5(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f5:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: cijh %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = add i32 %a, 1000000
+  %cmp = icmp sgt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...SGE too.
+define i32 @f6(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f6:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: cijhe %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = add i32 %a, 1000000
+  %cmp = icmp sge i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; Subtraction also provides enough for equality comparisons with zero.
+define i32 @f7(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f7:
+; CHECK: s %r2, 0(%r4)
+; CHECK-NEXT: jne .L{{.*}}
+; CHECK: br %r14
+entry:
+  %cur = load i32 *%dest
+  %res = sub i32 %a, %cur
+  %cmp = icmp ne i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...but not for ordered comparisons.
+define i32 @f8(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f8:
+; CHECK: s %r2, 0(%r4)
+; CHECK-NEXT: cijl %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %cur = load i32 *%dest
+  %res = sub i32 %a, %cur
+  %cmp = icmp slt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; Logic register-register instructions also provide enough for equality
+; comparisons with zero.
+define i32 @f9(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f9:
+; CHECK: nr %r2, %r3
+; CHECK-NEXT: jl .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = and i32 %a, %b
+  %cmp = icmp ne i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...but not for ordered comparisons.
+define i32 @f10(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f10:
+; CHECK: nr %r2, %r3
+; CHECK-NEXT: cijl %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = and i32 %a, %b
+  %cmp = icmp slt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; Logic register-immediate instructions also provide enough for equality
+; comparisons with zero if the immediate covers the whole register.
+define i32 @f11(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f11:
+; CHECK: nilf %r2, 100
+; CHECK-NEXT: jl .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = and i32 %a, 100
+  %cmp = icmp ne i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; Partial logic register-immediate instructions do not provide simple
+; zero results.
+define i32 @f12(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f12:
+; CHECK: nill %r2, 65436
+; CHECK-NEXT: cijlh %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = and i32 %a, -100
+  %cmp = icmp ne i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; SRA provides the same CC result as a comparison with zero.
+define i32 @f13(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f13:
+; CHECK: sra %r2, 0(%r3)
+; CHECK-NEXT: je .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = ashr i32 %a, %b
+  %cmp = icmp eq i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...and again with NE.
+define i32 @f14(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f14:
+; CHECK: sra %r2, 0(%r3)
+; CHECK-NEXT: jlh .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = ashr i32 %a, %b
+  %cmp = icmp ne i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...and SLT.
+define i32 @f15(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f15:
+; CHECK: sra %r2, 0(%r3)
+; CHECK-NEXT: jl .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = ashr i32 %a, %b
+  %cmp = icmp slt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...and SLE.
+define i32 @f16(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f16:
+; CHECK: sra %r2, 0(%r3)
+; CHECK-NEXT: jle .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = ashr i32 %a, %b
+  %cmp = icmp sle i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...and SGT.
+define i32 @f17(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f17:
+; CHECK: sra %r2, 0(%r3)
+; CHECK-NEXT: jh .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = ashr i32 %a, %b
+  %cmp = icmp sgt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...and SGE.
+define i32 @f18(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f18:
+; CHECK: sra %r2, 0(%r3)
+; CHECK-NEXT: jhe .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = ashr i32 %a, %b
+  %cmp = icmp sge i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; RISBG provides the same result as a comparison against zero.
+; Test the EQ case.
+define i64 @f19(i64 %a, i64 %b, i64 *%dest) {
+; CHECK-LABEL: f19:
+; CHECK: risbg %r2, %r3, 0, 190, 0
+; CHECK-NEXT: je .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = and i64 %b, -2
+  %cmp = icmp eq i64 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 %b, i64 *%dest
+  br label %exit
+
+exit:
+  ret i64 %res
+}
+
+; ...and the SLT case.
+define i64 @f20(i64 %a, i64 %b, i64 *%dest) {
+; CHECK-LABEL: f20:
+; CHECK: risbg %r2, %r3, 0, 190, 0
+; CHECK-NEXT: jl .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = and i64 %b, -2
+  %cmp = icmp slt i64 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 %b, i64 *%dest
+  br label %exit
+
+exit:
+  ret i64 %res
+}
+
+; Test a case where the register we're testing is set by a non-CC-clobbering
+; instruction.
+define i32 @f21(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f21:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah %r2
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: cije %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %add = add i32 %a, 1000000
+  %res = call i32 asm "blah $0", "=r,0" (i32 %add)
+  %cmp = icmp eq i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...and again with a CC-clobbering instruction.
+define i32 @f22(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f22:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah %r2
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: cije %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %add = add i32 %a, 1000000
+  %res = call i32 asm "blah $0", "=r,0,~{cc}" (i32 %add)
+  %cmp = icmp eq i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; Check that stores do not interfere.
+define i32 @f23(i32 %a, i32 %b, i32 *%dest1, i32 *%dest2) {
+; CHECK-LABEL: f23:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: st %r2, 0(%r4)
+; CHECK-NEXT: jne .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = add i32 %a, 1000000
+  store i32 %res, i32 *%dest1
+  %cmp = icmp ne i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest2
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; Check that calls do interfere.
+define void @f24(i32 *%ptr) {
+; CHECK-LABEL: f24:
+; CHECK: afi [[REG:%r[0-9]+]], 1000000
+; CHECK-NEXT: brasl %r14, foo@PLT
+; CHECK-NEXT: cijlh [[REG]], 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %val = load i32 *%ptr
+  %xor = xor i32 %val, 1
+  %add = add i32 %xor, 1000000
+  call void @foo()
+  %cmp = icmp ne i32 %add, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %add, i32 *%ptr
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that inline asms don't interfere if they don't clobber CC.
+define void @f25(i32 %a, i32 *%ptr) {
+; CHECK-LABEL: f25:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jne .L{{.*}}
+; CHECK: br %r14
+entry:
+  %add = add i32 %a, 1000000
+  call void asm sideeffect "blah", "r"(i32 %add)
+  %cmp = icmp ne i32 %add, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %add, i32 *%ptr
+  br label %exit
+
+exit:
+  ret void
+}
+
+; ...but do interfere if they do clobber CC.
+define void @f26(i32 %a, i32 *%ptr) {
+; CHECK-LABEL: f26:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: cijlh %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %add = add i32 %a, 1000000
+  call void asm sideeffect "blah", "r,~{cc}"(i32 %add)
+  %cmp = icmp ne i32 %add, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %add, i32 *%ptr
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Test a case where CC is set based on a different register from the
+; compare input.
+define i32 @f27(i32 %a, i32 %b, i32 *%dest1, i32 *%dest2) {
+; CHECK-LABEL: f27:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: sr %r3, %r2
+; CHECK-NEXT: st %r3, 0(%r4)
+; CHECK-NEXT: cije %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %add = add i32 %a, 1000000
+  %sub = sub i32 %b, %add
+  store i32 %sub, i32 *%dest1
+  %cmp = icmp eq i32 %add, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %sub, i32 *%dest2
+  br label %exit
+
+exit:
+  ret i32 %add
+}
+
+; Make sure that we don't confuse a base register for a destination.
+define void @f28(i64 %a, i64 *%dest) {
+; CHECK-LABEL: f28:
+; CHECK: xi 0(%r2), 15
+; CHECK: cgije %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %ptr = inttoptr i64 %a to i8 *
+  %val = load i8 *%ptr
+  %xor = xor i8 %val, 15
+  store i8 %xor, i8 *%ptr
+  %cmp = icmp eq i64 %a, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 %a, i64 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-45.ll b/llvm/test/CodeGen/SystemZ/int-cmp-45.ll
new file mode 100644
index 000000000000..753a528e46c9
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-45.ll
@@ -0,0 +1,115 @@
+; Test that compares are ommitted if CC already has the right value
+; (z196 version).
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Addition provides enough for equality comparisons with zero.  First teest
+; the EQ case with LOC.
+define i32 @f1(i32 %a, i32 %b, i32 *%cptr) {
+; CHECK-LABEL: f1:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: loce %r3, 0(%r4)
+; CHECK: br %r14
+  %add = add i32 %a, 1000000
+  %cmp = icmp eq i32 %add, 0
+  %c = load i32 *%cptr
+  %arg = select i1 %cmp, i32 %c, i32 %b
+  call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
+  ret i32 %add
+}
+
+; ...and again with STOC.
+define i32 @f2(i32 %a, i32 %b, i32 *%cptr) {
+; CHECK-LABEL: f2:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: stoce %r3, 0(%r4)
+; CHECK: br %r14
+  %add = add i32 %a, 1000000
+  %cmp = icmp eq i32 %add, 0
+  %c = load i32 *%cptr
+  %newval = select i1 %cmp, i32 %b, i32 %c
+  store i32 %newval, i32 *%cptr
+  ret i32 %add
+}
+
+; Reverse the select order and test with LOCR.
+define i32 @f3(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: f3:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: locrne %r3, %r4
+; CHECK: br %r14
+  %add = add i32 %a, 1000000
+  %cmp = icmp eq i32 %add, 0
+  %arg = select i1 %cmp, i32 %b, i32 %c
+  call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
+  ret i32 %add
+}
+
+; ...and again with LOC.
+define i32 @f4(i32 %a, i32 %b, i32 *%cptr) {
+; CHECK-LABEL: f4:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: locne %r3, 0(%r4)
+; CHECK: br %r14
+  %add = add i32 %a, 1000000
+  %cmp = icmp eq i32 %add, 0
+  %c = load i32 *%cptr
+  %arg = select i1 %cmp, i32 %b, i32 %c
+  call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
+  ret i32 %add
+}
+
+; ...and again with STOC.
+define i32 @f5(i32 %a, i32 %b, i32 *%cptr) {
+; CHECK-LABEL: f5:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: stocne %r3, 0(%r4)
+; CHECK: br %r14
+  %add = add i32 %a, 1000000
+  %cmp = icmp eq i32 %add, 0
+  %c = load i32 *%cptr
+  %newval = select i1 %cmp, i32 %c, i32 %b
+  store i32 %newval, i32 *%cptr
+  ret i32 %add
+}
+
+; Change the EQ in f3 to NE.
+define i32 @f6(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: f6:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: locre %r3, %r4
+; CHECK: br %r14
+  %add = add i32 %a, 1000000
+  %cmp = icmp ne i32 %add, 0
+  %arg = select i1 %cmp, i32 %b, i32 %c
+  call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
+  ret i32 %add
+}
+
+; ...and again with LOC.
+define i32 @f7(i32 %a, i32 %b, i32 *%cptr) {
+; CHECK-LABEL: f7:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: loce %r3, 0(%r4)
+; CHECK: br %r14
+  %add = add i32 %a, 1000000
+  %cmp = icmp ne i32 %add, 0
+  %c = load i32 *%cptr
+  %arg = select i1 %cmp, i32 %b, i32 %c
+  call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
+  ret i32 %add
+}
+
+; ...and again with STOC.
+define i32 @f8(i32 %a, i32 %b, i32 *%cptr) {
+; CHECK-LABEL: f8:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: stoce %r3, 0(%r4)
+; CHECK: br %r14
+  %add = add i32 %a, 1000000
+  %cmp = icmp ne i32 %add, 0
+  %c = load i32 *%cptr
+  %newval = select i1 %cmp, i32 %c, i32 %b
+  store i32 %newval, i32 *%cptr
+  ret i32 %add
+}