From fd7f4ae6d497166a5d144d8450db84800796809d Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 1 Aug 2013 10:39:40 +0000 Subject: [PATCH] [SystemZ] Reuse CC results for integer comparisons with zero This also fixes a bug in the predication of LR to LOCR: I'd forgotten that with these in-place instruction builds, the implicit operands need to be added manually. I think this was latent until now, but is tested by int-cmp-45.c. It also adds a CC valid mask to STOC, again tested by int-cmp-45.c. llvm-svn: 187573 --- .../Target/SystemZ/SystemZISelLowering.cpp | 3 +- .../lib/Target/SystemZ/SystemZInstrFormats.td | 49 +- llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 3 +- llvm/lib/Target/SystemZ/SystemZInstrInfo.h | 24 +- llvm/lib/Target/SystemZ/SystemZInstrInfo.td | 71 ++- llvm/lib/Target/SystemZ/SystemZLongBranch.cpp | 187 +++++- llvm/test/CodeGen/SystemZ/int-cmp-44.ll | 576 ++++++++++++++++++ llvm/test/CodeGen/SystemZ/int-cmp-45.ll | 115 ++++ 8 files changed, 969 insertions(+), 59 deletions(-) create mode 100644 llvm/test/CodeGen/SystemZ/int-cmp-44.ll create mode 100644 llvm/test/CodeGen/SystemZ/int-cmp-45.ll diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index ffd842d49a34..6acdcd4bef0c 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1813,7 +1813,8 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI, if (Invert) CCMask ^= CCValid; BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) - .addReg(SrcReg).addOperand(Base).addImm(Disp).addImm(CCMask); + .addReg(SrcReg).addOperand(Base).addImm(Disp) + .addImm(CCValid).addImm(CCMask); MI->eraseFromParent(); return MBB; } diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td index 915891d09d7c..98837149030a 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -61,12 +61,41 @@ class InstSystemZ AccessBytes = 0; - let TSFlags{0} = SimpleBDXLoad; - let TSFlags{1} = SimpleBDXStore; - let TSFlags{2} = Has20BitOffset; - let TSFlags{3} = HasIndex; - let TSFlags{4} = Is128Bit; - let TSFlags{9-5} = AccessBytes; + // If the instruction sets CC to a useful value, this gives the mask + // of all possible CC results. The mask has the same form as + // SystemZ::CCMASK_*. + bits<4> CCValues = 0; + + // True if the instruction sets CC to 0 when the result is 0. + bit CCHasZero = 0; + + // True if the instruction sets CC to 1 when the result is less than 0 + // and to 2 when the result is greater than 0. + bit CCHasOrder = 0; + + // True if the instruction is conditional and if the CC mask operand + // comes first (as for BRC, etc.). + bit CCMaskFirst = 0; + + // Similar, but true if the CC mask operand comes last (as for LOC, etc.). + bit CCMaskLast = 0; + + // True if the instruction is the "logical" rather than "arithmetic" form, + // in cases where a distinction exists. + bit IsLogical = 0; + + let TSFlags{0} = SimpleBDXLoad; + let TSFlags{1} = SimpleBDXStore; + let TSFlags{2} = Has20BitOffset; + let TSFlags{3} = HasIndex; + let TSFlags{4} = Is128Bit; + let TSFlags{9-5} = AccessBytes; + let TSFlags{13-10} = CCValues; + let TSFlags{14} = CCHasZero; + let TSFlags{15} = CCHasOrder; + let TSFlags{16} = CCMaskFirst; + let TSFlags{17} = CCMaskLast; + let TSFlags{18} = IsLogical; } //===----------------------------------------------------------------------===// @@ -623,11 +652,12 @@ multiclass StoreSIPair siOpcode, bits<16> siyOpcode, class CondStoreRSY opcode, RegisterOperand cls, bits<5> bytes, AddressingMode mode = bdaddr20only> - : InstRSY, Requires<[FeatureLoadStoreOnCond]> { let mayStore = 1; let AccessBytes = bytes; + let CCMaskLast = 1; } // Like CondStoreRSY, but used for the raw assembly form. The condition-code @@ -686,7 +716,9 @@ class CondUnaryRRF opcode, RegisterOperand cls1, RegisterOperand cls2> : InstRRF, - Requires<[FeatureLoadStoreOnCond]>; + Requires<[FeatureLoadStoreOnCond]> { + let CCMaskLast = 1; +} // Like CondUnaryRRF, but used for the raw assembly form. The condition-code // mask is the third operand rather than being part of the mnemonic. @@ -748,6 +780,7 @@ class CondUnaryRSY opcode, let DisableEncoding = "$R1src"; let mayLoad = 1; let AccessBytes = bytes; + let CCMaskLast = 1; } // Like CondUnaryRSY, but used for the raw assembly form. The condition-code diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 2b604a99fdb7..9913db7b0e42 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -341,7 +341,8 @@ PredicateInstruction(MachineInstr *MI, if (unsigned CondOpcode = getConditionalMove(Opcode)) { MI->setDesc(get(CondOpcode)); MachineInstrBuilder(*MI->getParent()->getParent(), MI) - .addImm(CCValid).addImm(CCMask); + .addImm(CCValid).addImm(CCMask) + .addReg(SystemZ::CC, RegState::Implicit);; return true; } } diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h index 917ac6e348e1..763a3956fc1e 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -28,17 +28,27 @@ class SystemZTargetMachine; namespace SystemZII { enum { // See comments in SystemZInstrFormats.td. - SimpleBDXLoad = (1 << 0), - SimpleBDXStore = (1 << 1), - Has20BitOffset = (1 << 2), - HasIndex = (1 << 3), - Is128Bit = (1 << 4), - AccessSizeMask = (31 << 5), - AccessSizeShift = 5 + SimpleBDXLoad = (1 << 0), + SimpleBDXStore = (1 << 1), + Has20BitOffset = (1 << 2), + HasIndex = (1 << 3), + Is128Bit = (1 << 4), + AccessSizeMask = (31 << 5), + AccessSizeShift = 5, + CCValuesMask = (15 << 10), + CCValuesShift = 10, + CCHasZero = (1 << 14), + CCHasOrder = (1 << 15), + CCMaskFirst = (1 << 16), + CCMaskLast = (1 << 17), + IsLogical = (1 << 18) }; static inline unsigned getAccessSize(unsigned int Flags) { return (Flags & AccessSizeMask) >> AccessSizeShift; } + static inline unsigned getCCValues(unsigned int Flags) { + return (Flags & CCValuesMask) >> CCValuesShift; + } // SystemZ MachineOperand target flags. enum { diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 341eb9040409..748539aa5b63 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -59,7 +59,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in { // the first operand. It seems friendlier to use mnemonic forms like // JE and JLH when writing out the assembly though. let isBranch = 1, isTerminator = 1, Uses = [CC] in { - let isCodeGenOnly = 1 in { + let isCodeGenOnly = 1, CCMaskFirst = 1 in { def BRC : InstRI<0xA74, (outs), (ins cond4:$valid, cond4:$R1, brtarget16:$I2), "j$R1\t$I2", [(z_br_ccmask cond4:$valid, cond4:$R1, bb:$I2)]>; @@ -195,7 +195,7 @@ defm CondStore64 : CondStores; @@ -512,9 +512,12 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1, //===----------------------------------------------------------------------===// let Defs = [CC] in { - def LCR : UnaryRR <"lc", 0x13, ineg, GR32, GR32>; - def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>; - def LCGFR : UnaryRRE<"lcgf", 0xB913, null_frag, GR64, GR32>; + let CCValues = 0xF, CCHasZero = 1 in { + def LCR : UnaryRR <"lc", 0x13, ineg, GR32, GR32>; + def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>; + } + let CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in + def LCGFR : UnaryRRE<"lcgf", 0xB913, null_frag, GR64, GR32>; } defm : SXU; @@ -566,7 +569,7 @@ def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm), //===----------------------------------------------------------------------===// // Plain addition. -let Defs = [CC] in { +let Defs = [CC], CCValues = 0xF, CCHasZero = 1 in { // Addition of a register. let isCommutable = 1 in { defm AR : BinaryRRAndK<"a", 0x1A, 0xB9F8, add, GR32, GR32>; @@ -637,7 +640,7 @@ let Defs = [CC], Uses = [CC] in { // Plain substraction. Although immediate forms exist, we use the // add-immediate instruction instead. -let Defs = [CC] in { +let Defs = [CC], CCValues = 0xF, CCHasZero = 1 in { // Subtraction of a register. defm SR : BinaryRRAndK<"s", 0x1B, 0xB9F9, sub, GR32, GR32>; def SGFR : BinaryRRE<"sgf", 0xB919, null_frag, GR64, GR32>; @@ -687,13 +690,14 @@ let Defs = [CC], Uses = [CC] in { let Defs = [CC] in { // ANDs of a register. - let isCommutable = 1 in { + let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in { defm NR : BinaryRRAndK<"n", 0x14, 0xB9F4, and, GR32, GR32>; defm NGR : BinaryRREAndK<"ng", 0xB980, 0xB9E4, and, GR64, GR64>; } let isConvertibleToThreeAddress = 1 in { // ANDs of a 16-bit immediate, leaving other bits unaffected. + // The CC result only reflects the 16-bit field, not the full register. let isCodeGenOnly = 1 in { def NILL32 : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>; def NILH32 : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>; @@ -704,15 +708,19 @@ let Defs = [CC] in { def NIHH : BinaryRI<"nihh", 0xA54, and, GR64, imm64hh16c>; // ANDs of a 32-bit immediate, leaving other bits unaffected. - let isCodeGenOnly = 1 in + // The CC result only reflects the 32-bit field, which means we can + // use it as a zero indicator for i32 operations but not otherwise. + let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in def NILF32 : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>; def NILF : BinaryRIL<"nilf", 0xC0B, and, GR64, imm64lf32c>; def NIHF : BinaryRIL<"nihf", 0xC0A, and, GR64, imm64hf32c>; } // ANDs of memory. - defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>; - def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>; + let CCValues = 0xC, CCHasZero = 1 in { + defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>; + def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>; + } // AND to memory defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, uimm8>; @@ -726,12 +734,13 @@ defm : RMWIByte; let Defs = [CC] in { // ORs of a register. - let isCommutable = 1 in { + let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in { defm OR : BinaryRRAndK<"o", 0x16, 0xB9F6, or, GR32, GR32>; defm OGR : BinaryRREAndK<"og", 0xB981, 0xB9E6, or, GR64, GR64>; } // ORs of a 16-bit immediate, leaving other bits unaffected. + // The CC result only reflects the 16-bit field, not the full register. let isCodeGenOnly = 1 in { def OILL32 : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>; def OILH32 : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>; @@ -742,14 +751,18 @@ let Defs = [CC] in { def OIHH : BinaryRI<"oihh", 0xA58, or, GR64, imm64hh16>; // ORs of a 32-bit immediate, leaving other bits unaffected. - let isCodeGenOnly = 1 in + // The CC result only reflects the 32-bit field, which means we can + // use it as a zero indicator for i32 operations but not otherwise. + let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in def OILF32 : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>; def OILF : BinaryRIL<"oilf", 0xC0D, or, GR64, imm64lf32>; def OIHF : BinaryRIL<"oihf", 0xC0C, or, GR64, imm64hf32>; // ORs of memory. - defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>; - def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>; + let CCValues = 0xC, CCHasZero = 1 in { + defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>; + def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>; + } // OR to memory defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, uimm8>; @@ -763,20 +776,24 @@ defm : RMWIByte; let Defs = [CC] in { // XORs of a register. - let isCommutable = 1 in { + let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in { defm XR : BinaryRRAndK<"x", 0x17, 0xB9F7, xor, GR32, GR32>; defm XGR : BinaryRREAndK<"xg", 0xB982, 0xB9E7, xor, GR64, GR64>; } // XORs of a 32-bit immediate, leaving other bits unaffected. - let isCodeGenOnly = 1 in + // The CC result only reflects the 32-bit field, which means we can + // use it as a zero indicator for i32 operations but not otherwise. + let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in def XILF32 : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>; def XILF : BinaryRIL<"xilf", 0xC07, xor, GR64, imm64lf32>; def XIHF : BinaryRIL<"xihf", 0xC06, xor, GR64, imm64hf32>; // XORs of memory. - defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>; - def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>; + let CCValues = 0xC, CCHasZero = 1 in { + defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>; + def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>; + } // XOR to memory defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, uimm8>; @@ -849,7 +866,7 @@ let neverHasSideEffects = 1 in { } // Arithmetic shift right. -let Defs = [CC] in { +let Defs = [CC], CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in { defm SRA : ShiftRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>; def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64>; } @@ -862,11 +879,12 @@ let neverHasSideEffects = 1 in { // Rotate second operand left and inserted selected bits into first operand. // These can act like 32-bit operands provided that the constant start and -// end bits (operands 2 and 3) are in the range [32, 64) +// end bits (operands 2 and 3) are in the range [32, 64). let Defs = [CC] in { let isCodeGenOnly = 1 in - def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>; - def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>; + def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>; + let CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in + def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>; } // Forms of RISBG that only affect one word of the destination register. @@ -880,7 +898,8 @@ def RISBLG : RotateSelectRIEf<"risblg", 0xEC51, GR64, GR64>, Requires<[FeatureHighWord]>; // Rotate second operand left and perform a logical operation with selected -// bits of the first operand. +// bits of the first operand. The CC result only describes the selected bits, +// so isn't useful for a full comparison against zero. let Defs = [CC] in { def RNSBG : RotateSelectRIEf<"rnsbg", 0xEC54, GR64, GR64>; def ROSBG : RotateSelectRIEf<"rosbg", 0xEC56, GR64, GR64>; @@ -892,7 +911,7 @@ let Defs = [CC] in { //===----------------------------------------------------------------------===// // Signed comparisons. -let Defs = [CC] in { +let Defs = [CC], CCValues = 0xE in { // Comparison with a register. def CR : CompareRR <"c", 0x19, z_cmp, GR32, GR32>; def CGFR : CompareRRE<"cgf", 0xB930, null_frag, GR64, GR32>; @@ -926,7 +945,7 @@ let Defs = [CC] in { defm : SXB; // Unsigned comparisons. -let Defs = [CC] in { +let Defs = [CC], CCValues = 0xE, IsLogical = 1 in { // Comparison with a register. def CLR : CompareRR <"cl", 0x15, z_ucmp, GR32, GR32>; def CLGFR : CompareRRE<"clgf", 0xB931, null_frag, GR64, GR32>; diff --git a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp index 9b637c01c6eb..f0ea3e20be61 100644 --- a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp +++ b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp @@ -7,18 +7,36 @@ // //===----------------------------------------------------------------------===// // -// This pass does two things: -// (1) fuse compares and branches into COMPARE AND BRANCH instructions -// (2) make sure that all branches are in range. +// This pass does three things: +// (1) try to remove compares if CC already contains the required information +// (2) fuse compares and branches into COMPARE AND BRANCH instructions +// (3) make sure that all branches are in range. // -// We do (1) here rather than earlier because the fused form prevents -// predication. +// We do (1) here rather than earlier because some transformations can +// change the set of available CC values and we generally want those +// transformations to have priority over (1). This is especially true in +// the commonest case where the CC value is used by a single in-range branch +// instruction, since (2) will then be able to fuse the compare and the +// branch instead. // -// Doing it so late makes it more likely that a register will be reused +// For example, two-address NILF can sometimes be converted into +// three-address RISBLG. NILF produces a CC value that indicates whether +// the low word is zero, but RISBLG does not modify CC at all. On the +// other hand, 64-bit ANDs like NILL can sometimes be converted to RISBG. +// The CC value produced by NILL isn't useful for our purposes, but the +// value produced by RISBG can be used for any comparison with zero +// (not just equality). So there are some transformations that lose +// CC values (while still being worthwhile) and others that happen to make +// the CC result more useful than it was originally. +// +// We do (2) here rather than earlier because the fused form prevents +// predication. It also has to happen after (1). +// +// Doing (2) so late makes it more likely that a register will be reused // between the compare and the branch, but it isn't clear whether preventing // that would be a win or not. // -// There are several ways in which (2) could be done. One aggressive +// There are several ways in which (3) could be done. One aggressive // approach is to assume that all branches are in range and successively // replace those that turn out not to be in range with a longer form // (branch relaxation). A simple implementation is to continually walk @@ -156,6 +174,7 @@ namespace { void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator, bool AssumeRelaxed); TerminatorInfo describeTerminator(MachineInstr *MI); + bool optimizeCompareZero(MachineInstr *PrevCCSetter, MachineInstr *Compare); bool fuseCompareAndBranch(MachineInstr *Compare); uint64_t initMBBInfo(); bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address); @@ -254,6 +273,15 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) { return Terminator; } +// Return true if CC is live out of MBB. +static bool isCCLiveOut(MachineBasicBlock *MBB) { + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) + if ((*SI)->isLiveIn(SystemZ::CC)) + return true; + return false; +} + // Return true if CC is live after MBBI. static bool isCCLiveAfter(MachineBasicBlock::iterator MBBI, const TargetRegisterInfo *TRI) { @@ -269,12 +297,130 @@ static bool isCCLiveAfter(MachineBasicBlock::iterator MBBI, return false; } - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) - if ((*SI)->isLiveIn(SystemZ::CC)) - return true; + return isCCLiveOut(MBB); +} - return false; +// Return true if all uses of the CC value produced by MBBI could make do +// with the CC values in ReusableCCMask. When returning true, point AlterMasks +// to the "CC valid" and "CC mask" operands for each condition. +static bool canRestrictCCMask(MachineBasicBlock::iterator MBBI, + unsigned ReusableCCMask, + SmallVectorImpl &AlterMasks, + const TargetRegisterInfo *TRI) { + MachineBasicBlock *MBB = MBBI->getParent(); + MachineBasicBlock::iterator MBBE = MBB->end(); + for (++MBBI; MBBI != MBBE; ++MBBI) { + if (MBBI->readsRegister(SystemZ::CC, TRI)) { + // Fail if this isn't a use of CC that we understand. + unsigned MBBIFlags = MBBI->getDesc().TSFlags; + unsigned FirstOpNum; + if (MBBIFlags & SystemZII::CCMaskFirst) + FirstOpNum = 0; + else if (MBBIFlags & SystemZII::CCMaskLast) + FirstOpNum = MBBI->getNumExplicitOperands() - 2; + else + return false; + + // Check whether the instruction predicate treats all CC values + // outside of ReusableCCMask in the same way. In that case it + // doesn't matter what those CC values mean. + unsigned CCValid = MBBI->getOperand(FirstOpNum).getImm(); + unsigned CCMask = MBBI->getOperand(FirstOpNum + 1).getImm(); + unsigned OutValid = ~ReusableCCMask & CCValid; + unsigned OutMask = ~ReusableCCMask & CCMask; + if (OutMask != 0 && OutMask != OutValid) + return false; + + AlterMasks.push_back(&MBBI->getOperand(FirstOpNum)); + AlterMasks.push_back(&MBBI->getOperand(FirstOpNum + 1)); + + // Succeed if this was the final use of the CC value. + if (MBBI->killsRegister(SystemZ::CC, TRI)) + return true; + } + // Succeed if the instruction redefines CC. + if (MBBI->definesRegister(SystemZ::CC, TRI)) + return true; + } + // Fail if there are other uses of CC that we didn't see. + return !isCCLiveOut(MBB); +} + +// Try to make Compare redundant with PrevCCSetter, the previous setter of CC, +// by looking for cases where Compare compares the result of PrevCCSetter +// against zero. Return true on success and if Compare can therefore +// be deleted. +bool SystemZLongBranch::optimizeCompareZero(MachineInstr *PrevCCSetter, + MachineInstr *Compare) { + if (MF->getTarget().getOptLevel() == CodeGenOpt::None) + return false; + + // Check whether this is a comparison against zero. + if (Compare->getNumExplicitOperands() != 2 || + !Compare->getOperand(1).isImm() || + Compare->getOperand(1).getImm() != 0) + return false; + + // See which compare-style condition codes are available after PrevCCSetter. + unsigned PrevFlags = PrevCCSetter->getDesc().TSFlags; + unsigned ReusableCCMask = 0; + if (PrevFlags & SystemZII::CCHasZero) + ReusableCCMask |= SystemZ::CCMASK_CMP_EQ; + + // For unsigned comparisons with zero, only equality makes sense. + unsigned CompareFlags = Compare->getDesc().TSFlags; + if (!(CompareFlags & SystemZII::IsLogical) && + (PrevFlags & SystemZII::CCHasOrder)) + ReusableCCMask |= SystemZ::CCMASK_CMP_LT | SystemZ::CCMASK_CMP_GT; + + if (ReusableCCMask == 0) + return false; + + // Make sure that PrevCCSetter sets the value being compared. + unsigned SrcReg = Compare->getOperand(0).getReg(); + unsigned SrcSubReg = Compare->getOperand(0).getSubReg(); + if (!PrevCCSetter->getOperand(0).isReg() || + !PrevCCSetter->getOperand(0).isDef() || + PrevCCSetter->getOperand(0).getReg() != SrcReg || + PrevCCSetter->getOperand(0).getSubReg() != SrcSubReg) + return false; + + // Make sure that SrcReg survives until Compare. + MachineBasicBlock::iterator MBBI = PrevCCSetter, MBBE = Compare; + const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); + for (++MBBI; MBBI != MBBE; ++MBBI) + if (MBBI->modifiesRegister(SrcReg, TRI)) + return false; + + // See whether all uses of Compare's CC value could make do with + // the values produced by PrevCCSetter. + SmallVector AlterMasks; + if (!canRestrictCCMask(Compare, ReusableCCMask, AlterMasks, TRI)) + return false; + + // Alter the CC masks that canRestrictCCMask says need to be altered. + unsigned CCValues = SystemZII::getCCValues(PrevFlags); + assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues"); + for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) { + AlterMasks[I]->setImm(CCValues); + unsigned CCMask = AlterMasks[I + 1]->getImm(); + if (CCMask & ~ReusableCCMask) + AlterMasks[I + 1]->setImm((CCMask & ReusableCCMask) | + (CCValues & ~ReusableCCMask)); + } + + // CC is now live after PrevCCSetter. + int CCDef = PrevCCSetter->findRegisterDefOperandIdx(SystemZ::CC, false, + true, TRI); + assert(CCDef >= 0 && "Couldn't find CC set"); + PrevCCSetter->getOperand(CCDef).setIsDead(false); + + // Clear any intervening kills of CC. + MBBI = PrevCCSetter; + for (++MBBI; MBBI != MBBE; ++MBBI) + MBBI->clearRegisterKills(SystemZ::CC, TRI); + + return true; } // Try to fuse compare instruction Compare into a later branch. Return @@ -345,6 +491,8 @@ bool SystemZLongBranch::fuseCompareAndBranch(MachineInstr *Compare) { // that no branches need relaxation. Return the size of the function under // this assumption. uint64_t SystemZLongBranch::initMBBInfo() { + const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); + MF->RenumberBlocks(); unsigned NumBlocks = MF->size(); @@ -365,13 +513,20 @@ uint64_t SystemZLongBranch::initMBBInfo() { // Calculate the size of the fixed part of the block. MachineBasicBlock::iterator MI = MBB->begin(); MachineBasicBlock::iterator End = MBB->end(); + MachineInstr *PrevCCSetter = 0; while (MI != End && !MI->isTerminator()) { MachineInstr *Current = MI; ++MI; - if (Current->isCompare() && fuseCompareAndBranch(Current)) - Current->removeFromParent(); - else - Block.Size += TII->getInstSizeInBytes(Current); + if (Current->isCompare()) { + if ((PrevCCSetter && optimizeCompareZero(PrevCCSetter, Current)) || + fuseCompareAndBranch(Current)) { + Current->removeFromParent(); + continue; + } + } + if (Current->modifiesRegister(SystemZ::CC, TRI)) + PrevCCSetter = Current; + Block.Size += TII->getInstSizeInBytes(Current); } skipNonTerminators(Position, Block); diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-44.ll b/llvm/test/CodeGen/SystemZ/int-cmp-44.ll new file mode 100644 index 000000000000..5218d41c6ad3 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-cmp-44.ll @@ -0,0 +1,576 @@ +; Test that compares are ommitted if CC already has the right value +; (z10 version). +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + +declare void @foo() + +; Addition provides enough for equality comparisons with zero. First teest +; the EQ case. +define i32 @f1(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f1: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: je .L{{.*}} +; CHECK: br %r14 +entry: + %res = add i32 %a, 1000000 + %cmp = icmp eq i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...and again with NE. +define i32 @f2(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f2: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: jne .L{{.*}} +; CHECK: br %r14 +entry: + %res = add i32 %a, 1000000 + %cmp = icmp ne i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; SLT requires a comparison. +define i32 @f3(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f3: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: cijl %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %res = add i32 %a, 1000000 + %cmp = icmp slt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...SLE too. +define i32 @f4(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f4: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: cijle %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %res = add i32 %a, 1000000 + %cmp = icmp sle i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...SGT too. +define i32 @f5(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f5: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: cijh %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %res = add i32 %a, 1000000 + %cmp = icmp sgt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...SGE too. +define i32 @f6(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f6: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: cijhe %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %res = add i32 %a, 1000000 + %cmp = icmp sge i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; Subtraction also provides enough for equality comparisons with zero. +define i32 @f7(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f7: +; CHECK: s %r2, 0(%r4) +; CHECK-NEXT: jne .L{{.*}} +; CHECK: br %r14 +entry: + %cur = load i32 *%dest + %res = sub i32 %a, %cur + %cmp = icmp ne i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...but not for ordered comparisons. +define i32 @f8(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f8: +; CHECK: s %r2, 0(%r4) +; CHECK-NEXT: cijl %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %cur = load i32 *%dest + %res = sub i32 %a, %cur + %cmp = icmp slt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; Logic register-register instructions also provide enough for equality +; comparisons with zero. +define i32 @f9(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f9: +; CHECK: nr %r2, %r3 +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + %res = and i32 %a, %b + %cmp = icmp ne i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...but not for ordered comparisons. +define i32 @f10(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f10: +; CHECK: nr %r2, %r3 +; CHECK-NEXT: cijl %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %res = and i32 %a, %b + %cmp = icmp slt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; Logic register-immediate instructions also provide enough for equality +; comparisons with zero if the immediate covers the whole register. +define i32 @f11(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f11: +; CHECK: nilf %r2, 100 +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + %res = and i32 %a, 100 + %cmp = icmp ne i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; Partial logic register-immediate instructions do not provide simple +; zero results. +define i32 @f12(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f12: +; CHECK: nill %r2, 65436 +; CHECK-NEXT: cijlh %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %res = and i32 %a, -100 + %cmp = icmp ne i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; SRA provides the same CC result as a comparison with zero. +define i32 @f13(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f13: +; CHECK: sra %r2, 0(%r3) +; CHECK-NEXT: je .L{{.*}} +; CHECK: br %r14 +entry: + %res = ashr i32 %a, %b + %cmp = icmp eq i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...and again with NE. +define i32 @f14(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f14: +; CHECK: sra %r2, 0(%r3) +; CHECK-NEXT: jlh .L{{.*}} +; CHECK: br %r14 +entry: + %res = ashr i32 %a, %b + %cmp = icmp ne i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...and SLT. +define i32 @f15(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f15: +; CHECK: sra %r2, 0(%r3) +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + %res = ashr i32 %a, %b + %cmp = icmp slt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...and SLE. +define i32 @f16(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f16: +; CHECK: sra %r2, 0(%r3) +; CHECK-NEXT: jle .L{{.*}} +; CHECK: br %r14 +entry: + %res = ashr i32 %a, %b + %cmp = icmp sle i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...and SGT. +define i32 @f17(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f17: +; CHECK: sra %r2, 0(%r3) +; CHECK-NEXT: jh .L{{.*}} +; CHECK: br %r14 +entry: + %res = ashr i32 %a, %b + %cmp = icmp sgt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...and SGE. +define i32 @f18(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f18: +; CHECK: sra %r2, 0(%r3) +; CHECK-NEXT: jhe .L{{.*}} +; CHECK: br %r14 +entry: + %res = ashr i32 %a, %b + %cmp = icmp sge i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; RISBG provides the same result as a comparison against zero. +; Test the EQ case. +define i64 @f19(i64 %a, i64 %b, i64 *%dest) { +; CHECK-LABEL: f19: +; CHECK: risbg %r2, %r3, 0, 190, 0 +; CHECK-NEXT: je .L{{.*}} +; CHECK: br %r14 +entry: + %res = and i64 %b, -2 + %cmp = icmp eq i64 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 %b, i64 *%dest + br label %exit + +exit: + ret i64 %res +} + +; ...and the SLT case. +define i64 @f20(i64 %a, i64 %b, i64 *%dest) { +; CHECK-LABEL: f20: +; CHECK: risbg %r2, %r3, 0, 190, 0 +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + %res = and i64 %b, -2 + %cmp = icmp slt i64 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 %b, i64 *%dest + br label %exit + +exit: + ret i64 %res +} + +; Test a case where the register we're testing is set by a non-CC-clobbering +; instruction. +define i32 @f21(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f21: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %r2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: cije %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %add = add i32 %a, 1000000 + %res = call i32 asm "blah $0", "=r,0" (i32 %add) + %cmp = icmp eq i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; ...and again with a CC-clobbering instruction. +define i32 @f22(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f22: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %r2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: cije %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %add = add i32 %a, 1000000 + %res = call i32 asm "blah $0", "=r,0,~{cc}" (i32 %add) + %cmp = icmp eq i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; Check that stores do not interfere. +define i32 @f23(i32 %a, i32 %b, i32 *%dest1, i32 *%dest2) { +; CHECK-LABEL: f23: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: st %r2, 0(%r4) +; CHECK-NEXT: jne .L{{.*}} +; CHECK: br %r14 +entry: + %res = add i32 %a, 1000000 + store i32 %res, i32 *%dest1 + %cmp = icmp ne i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest2 + br label %exit + +exit: + ret i32 %res +} + +; Check that calls do interfere. +define void @f24(i32 *%ptr) { +; CHECK-LABEL: f24: +; CHECK: afi [[REG:%r[0-9]+]], 1000000 +; CHECK-NEXT: brasl %r14, foo@PLT +; CHECK-NEXT: cijlh [[REG]], 0, .L{{.*}} +; CHECK: br %r14 +entry: + %val = load i32 *%ptr + %xor = xor i32 %val, 1 + %add = add i32 %xor, 1000000 + call void @foo() + %cmp = icmp ne i32 %add, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %add, i32 *%ptr + br label %exit + +exit: + ret void +} + +; Check that inline asms don't interfere if they don't clobber CC. +define void @f25(i32 %a, i32 *%ptr) { +; CHECK-LABEL: f25: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jne .L{{.*}} +; CHECK: br %r14 +entry: + %add = add i32 %a, 1000000 + call void asm sideeffect "blah", "r"(i32 %add) + %cmp = icmp ne i32 %add, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %add, i32 *%ptr + br label %exit + +exit: + ret void +} + +; ...but do interfere if they do clobber CC. +define void @f26(i32 %a, i32 *%ptr) { +; CHECK-LABEL: f26: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: cijlh %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %add = add i32 %a, 1000000 + call void asm sideeffect "blah", "r,~{cc}"(i32 %add) + %cmp = icmp ne i32 %add, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %add, i32 *%ptr + br label %exit + +exit: + ret void +} + +; Test a case where CC is set based on a different register from the +; compare input. +define i32 @f27(i32 %a, i32 %b, i32 *%dest1, i32 *%dest2) { +; CHECK-LABEL: f27: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: sr %r3, %r2 +; CHECK-NEXT: st %r3, 0(%r4) +; CHECK-NEXT: cije %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %add = add i32 %a, 1000000 + %sub = sub i32 %b, %add + store i32 %sub, i32 *%dest1 + %cmp = icmp eq i32 %add, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %sub, i32 *%dest2 + br label %exit + +exit: + ret i32 %add +} + +; Make sure that we don't confuse a base register for a destination. +define void @f28(i64 %a, i64 *%dest) { +; CHECK-LABEL: f28: +; CHECK: xi 0(%r2), 15 +; CHECK: cgije %r2, 0, .L{{.*}} +; CHECK: br %r14 +entry: + %ptr = inttoptr i64 %a to i8 * + %val = load i8 *%ptr + %xor = xor i8 %val, 15 + store i8 %xor, i8 *%ptr + %cmp = icmp eq i64 %a, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 %a, i64 *%dest + br label %exit + +exit: + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-45.ll b/llvm/test/CodeGen/SystemZ/int-cmp-45.ll new file mode 100644 index 000000000000..753a528e46c9 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-cmp-45.ll @@ -0,0 +1,115 @@ +; Test that compares are ommitted if CC already has the right value +; (z196 version). +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Addition provides enough for equality comparisons with zero. First teest +; the EQ case with LOC. +define i32 @f1(i32 %a, i32 %b, i32 *%cptr) { +; CHECK-LABEL: f1: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: loce %r3, 0(%r4) +; CHECK: br %r14 + %add = add i32 %a, 1000000 + %cmp = icmp eq i32 %add, 0 + %c = load i32 *%cptr + %arg = select i1 %cmp, i32 %c, i32 %b + call void asm sideeffect "blah $0", "{r3}"(i32 %arg) + ret i32 %add +} + +; ...and again with STOC. +define i32 @f2(i32 %a, i32 %b, i32 *%cptr) { +; CHECK-LABEL: f2: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: stoce %r3, 0(%r4) +; CHECK: br %r14 + %add = add i32 %a, 1000000 + %cmp = icmp eq i32 %add, 0 + %c = load i32 *%cptr + %newval = select i1 %cmp, i32 %b, i32 %c + store i32 %newval, i32 *%cptr + ret i32 %add +} + +; Reverse the select order and test with LOCR. +define i32 @f3(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: f3: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: locrne %r3, %r4 +; CHECK: br %r14 + %add = add i32 %a, 1000000 + %cmp = icmp eq i32 %add, 0 + %arg = select i1 %cmp, i32 %b, i32 %c + call void asm sideeffect "blah $0", "{r3}"(i32 %arg) + ret i32 %add +} + +; ...and again with LOC. +define i32 @f4(i32 %a, i32 %b, i32 *%cptr) { +; CHECK-LABEL: f4: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: locne %r3, 0(%r4) +; CHECK: br %r14 + %add = add i32 %a, 1000000 + %cmp = icmp eq i32 %add, 0 + %c = load i32 *%cptr + %arg = select i1 %cmp, i32 %b, i32 %c + call void asm sideeffect "blah $0", "{r3}"(i32 %arg) + ret i32 %add +} + +; ...and again with STOC. +define i32 @f5(i32 %a, i32 %b, i32 *%cptr) { +; CHECK-LABEL: f5: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: stocne %r3, 0(%r4) +; CHECK: br %r14 + %add = add i32 %a, 1000000 + %cmp = icmp eq i32 %add, 0 + %c = load i32 *%cptr + %newval = select i1 %cmp, i32 %c, i32 %b + store i32 %newval, i32 *%cptr + ret i32 %add +} + +; Change the EQ in f3 to NE. +define i32 @f6(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: f6: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: locre %r3, %r4 +; CHECK: br %r14 + %add = add i32 %a, 1000000 + %cmp = icmp ne i32 %add, 0 + %arg = select i1 %cmp, i32 %b, i32 %c + call void asm sideeffect "blah $0", "{r3}"(i32 %arg) + ret i32 %add +} + +; ...and again with LOC. +define i32 @f7(i32 %a, i32 %b, i32 *%cptr) { +; CHECK-LABEL: f7: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: loce %r3, 0(%r4) +; CHECK: br %r14 + %add = add i32 %a, 1000000 + %cmp = icmp ne i32 %add, 0 + %c = load i32 *%cptr + %arg = select i1 %cmp, i32 %b, i32 %c + call void asm sideeffect "blah $0", "{r3}"(i32 %arg) + ret i32 %add +} + +; ...and again with STOC. +define i32 @f8(i32 %a, i32 %b, i32 *%cptr) { +; CHECK-LABEL: f8: +; CHECK: afi %r2, 1000000 +; CHECK-NEXT: stoce %r3, 0(%r4) +; CHECK: br %r14 + %add = add i32 %a, 1000000 + %cmp = icmp ne i32 %add, 0 + %c = load i32 *%cptr + %newval = select i1 %cmp, i32 %c, i32 %b + store i32 %newval, i32 *%cptr + ret i32 %add +}