forked from OSchip/llvm-project
[SystemZ] Reuse CC results for integer comparisons with zero
This also fixes a bug in the predication of LR to LOCR: I'd forgotten that with these in-place instruction builds, the implicit operands need to be added manually. I think this was latent until now, but is tested by int-cmp-45.c. It also adds a CC valid mask to STOC, again tested by int-cmp-45.c. llvm-svn: 187573
This commit is contained in:
parent
8698af49fc
commit
fd7f4ae6d4
|
@ -1813,7 +1813,8 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
|
|||
if (Invert)
|
||||
CCMask ^= CCValid;
|
||||
BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
|
||||
.addReg(SrcReg).addOperand(Base).addImm(Disp).addImm(CCMask);
|
||||
.addReg(SrcReg).addOperand(Base).addImm(Disp)
|
||||
.addImm(CCValid).addImm(CCMask);
|
||||
MI->eraseFromParent();
|
||||
return MBB;
|
||||
}
|
||||
|
|
|
@ -61,12 +61,41 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr,
|
|||
// The access size of all memory operands in bytes, or 0 if not known.
|
||||
bits<5> AccessBytes = 0;
|
||||
|
||||
let TSFlags{0} = SimpleBDXLoad;
|
||||
let TSFlags{1} = SimpleBDXStore;
|
||||
let TSFlags{2} = Has20BitOffset;
|
||||
let TSFlags{3} = HasIndex;
|
||||
let TSFlags{4} = Is128Bit;
|
||||
let TSFlags{9-5} = AccessBytes;
|
||||
// If the instruction sets CC to a useful value, this gives the mask
|
||||
// of all possible CC results. The mask has the same form as
|
||||
// SystemZ::CCMASK_*.
|
||||
bits<4> CCValues = 0;
|
||||
|
||||
// True if the instruction sets CC to 0 when the result is 0.
|
||||
bit CCHasZero = 0;
|
||||
|
||||
// True if the instruction sets CC to 1 when the result is less than 0
|
||||
// and to 2 when the result is greater than 0.
|
||||
bit CCHasOrder = 0;
|
||||
|
||||
// True if the instruction is conditional and if the CC mask operand
|
||||
// comes first (as for BRC, etc.).
|
||||
bit CCMaskFirst = 0;
|
||||
|
||||
// Similar, but true if the CC mask operand comes last (as for LOC, etc.).
|
||||
bit CCMaskLast = 0;
|
||||
|
||||
// True if the instruction is the "logical" rather than "arithmetic" form,
|
||||
// in cases where a distinction exists.
|
||||
bit IsLogical = 0;
|
||||
|
||||
let TSFlags{0} = SimpleBDXLoad;
|
||||
let TSFlags{1} = SimpleBDXStore;
|
||||
let TSFlags{2} = Has20BitOffset;
|
||||
let TSFlags{3} = HasIndex;
|
||||
let TSFlags{4} = Is128Bit;
|
||||
let TSFlags{9-5} = AccessBytes;
|
||||
let TSFlags{13-10} = CCValues;
|
||||
let TSFlags{14} = CCHasZero;
|
||||
let TSFlags{15} = CCHasOrder;
|
||||
let TSFlags{16} = CCMaskFirst;
|
||||
let TSFlags{17} = CCMaskLast;
|
||||
let TSFlags{18} = IsLogical;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -623,11 +652,12 @@ multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
|
|||
class CondStoreRSY<string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls, bits<5> bytes,
|
||||
AddressingMode mode = bdaddr20only>
|
||||
: InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, cond4:$R3),
|
||||
: InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, cond4:$valid, cond4:$R3),
|
||||
mnemonic#"$R3\t$R1, $BD2", []>,
|
||||
Requires<[FeatureLoadStoreOnCond]> {
|
||||
let mayStore = 1;
|
||||
let AccessBytes = bytes;
|
||||
let CCMaskLast = 1;
|
||||
}
|
||||
|
||||
// Like CondStoreRSY, but used for the raw assembly form. The condition-code
|
||||
|
@ -686,7 +716,9 @@ class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
|
|||
RegisterOperand cls2>
|
||||
: InstRRF<opcode, (outs cls1:$R1), (ins cls2:$R2, cond4:$valid, cond4:$R3),
|
||||
mnemonic#"r$R3\t$R1, $R2", []>,
|
||||
Requires<[FeatureLoadStoreOnCond]>;
|
||||
Requires<[FeatureLoadStoreOnCond]> {
|
||||
let CCMaskLast = 1;
|
||||
}
|
||||
|
||||
// Like CondUnaryRRF, but used for the raw assembly form. The condition-code
|
||||
// mask is the third operand rather than being part of the mnemonic.
|
||||
|
@ -748,6 +780,7 @@ class CondUnaryRSY<string mnemonic, bits<16> opcode,
|
|||
let DisableEncoding = "$R1src";
|
||||
let mayLoad = 1;
|
||||
let AccessBytes = bytes;
|
||||
let CCMaskLast = 1;
|
||||
}
|
||||
|
||||
// Like CondUnaryRSY, but used for the raw assembly form. The condition-code
|
||||
|
|
|
@ -341,7 +341,8 @@ PredicateInstruction(MachineInstr *MI,
|
|||
if (unsigned CondOpcode = getConditionalMove(Opcode)) {
|
||||
MI->setDesc(get(CondOpcode));
|
||||
MachineInstrBuilder(*MI->getParent()->getParent(), MI)
|
||||
.addImm(CCValid).addImm(CCMask);
|
||||
.addImm(CCValid).addImm(CCMask)
|
||||
.addReg(SystemZ::CC, RegState::Implicit);;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,17 +28,27 @@ class SystemZTargetMachine;
|
|||
namespace SystemZII {
|
||||
enum {
|
||||
// See comments in SystemZInstrFormats.td.
|
||||
SimpleBDXLoad = (1 << 0),
|
||||
SimpleBDXStore = (1 << 1),
|
||||
Has20BitOffset = (1 << 2),
|
||||
HasIndex = (1 << 3),
|
||||
Is128Bit = (1 << 4),
|
||||
AccessSizeMask = (31 << 5),
|
||||
AccessSizeShift = 5
|
||||
SimpleBDXLoad = (1 << 0),
|
||||
SimpleBDXStore = (1 << 1),
|
||||
Has20BitOffset = (1 << 2),
|
||||
HasIndex = (1 << 3),
|
||||
Is128Bit = (1 << 4),
|
||||
AccessSizeMask = (31 << 5),
|
||||
AccessSizeShift = 5,
|
||||
CCValuesMask = (15 << 10),
|
||||
CCValuesShift = 10,
|
||||
CCHasZero = (1 << 14),
|
||||
CCHasOrder = (1 << 15),
|
||||
CCMaskFirst = (1 << 16),
|
||||
CCMaskLast = (1 << 17),
|
||||
IsLogical = (1 << 18)
|
||||
};
|
||||
static inline unsigned getAccessSize(unsigned int Flags) {
|
||||
return (Flags & AccessSizeMask) >> AccessSizeShift;
|
||||
}
|
||||
static inline unsigned getCCValues(unsigned int Flags) {
|
||||
return (Flags & CCValuesMask) >> CCValuesShift;
|
||||
}
|
||||
|
||||
// SystemZ MachineOperand target flags.
|
||||
enum {
|
||||
|
|
|
@ -59,7 +59,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in {
|
|||
// the first operand. It seems friendlier to use mnemonic forms like
|
||||
// JE and JLH when writing out the assembly though.
|
||||
let isBranch = 1, isTerminator = 1, Uses = [CC] in {
|
||||
let isCodeGenOnly = 1 in {
|
||||
let isCodeGenOnly = 1, CCMaskFirst = 1 in {
|
||||
def BRC : InstRI<0xA74, (outs), (ins cond4:$valid, cond4:$R1,
|
||||
brtarget16:$I2), "j$R1\t$I2",
|
||||
[(z_br_ccmask cond4:$valid, cond4:$R1, bb:$I2)]>;
|
||||
|
@ -195,7 +195,7 @@ defm CondStore64 : CondStores<GR64, nonvolatile_store,
|
|||
|
||||
// The definitions here are for the call-clobbered registers.
|
||||
let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D,
|
||||
F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D],
|
||||
F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D, CC],
|
||||
R1 = 14, isCodeGenOnly = 1 in {
|
||||
def BRAS : InstRI<0xA75, (outs), (ins pcrel16call:$I2, variable_ops),
|
||||
"bras\t%r14, $I2", []>;
|
||||
|
@ -512,9 +512,12 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Defs = [CC] in {
|
||||
def LCR : UnaryRR <"lc", 0x13, ineg, GR32, GR32>;
|
||||
def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>;
|
||||
def LCGFR : UnaryRRE<"lcgf", 0xB913, null_frag, GR64, GR32>;
|
||||
let CCValues = 0xF, CCHasZero = 1 in {
|
||||
def LCR : UnaryRR <"lc", 0x13, ineg, GR32, GR32>;
|
||||
def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>;
|
||||
}
|
||||
let CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in
|
||||
def LCGFR : UnaryRRE<"lcgf", 0xB913, null_frag, GR64, GR32>;
|
||||
}
|
||||
defm : SXU<ineg, LCGFR>;
|
||||
|
||||
|
@ -566,7 +569,7 @@ def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm),
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Plain addition.
|
||||
let Defs = [CC] in {
|
||||
let Defs = [CC], CCValues = 0xF, CCHasZero = 1 in {
|
||||
// Addition of a register.
|
||||
let isCommutable = 1 in {
|
||||
defm AR : BinaryRRAndK<"a", 0x1A, 0xB9F8, add, GR32, GR32>;
|
||||
|
@ -637,7 +640,7 @@ let Defs = [CC], Uses = [CC] in {
|
|||
|
||||
// Plain substraction. Although immediate forms exist, we use the
|
||||
// add-immediate instruction instead.
|
||||
let Defs = [CC] in {
|
||||
let Defs = [CC], CCValues = 0xF, CCHasZero = 1 in {
|
||||
// Subtraction of a register.
|
||||
defm SR : BinaryRRAndK<"s", 0x1B, 0xB9F9, sub, GR32, GR32>;
|
||||
def SGFR : BinaryRRE<"sgf", 0xB919, null_frag, GR64, GR32>;
|
||||
|
@ -687,13 +690,14 @@ let Defs = [CC], Uses = [CC] in {
|
|||
|
||||
let Defs = [CC] in {
|
||||
// ANDs of a register.
|
||||
let isCommutable = 1 in {
|
||||
let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in {
|
||||
defm NR : BinaryRRAndK<"n", 0x14, 0xB9F4, and, GR32, GR32>;
|
||||
defm NGR : BinaryRREAndK<"ng", 0xB980, 0xB9E4, and, GR64, GR64>;
|
||||
}
|
||||
|
||||
let isConvertibleToThreeAddress = 1 in {
|
||||
// ANDs of a 16-bit immediate, leaving other bits unaffected.
|
||||
// The CC result only reflects the 16-bit field, not the full register.
|
||||
let isCodeGenOnly = 1 in {
|
||||
def NILL32 : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>;
|
||||
def NILH32 : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>;
|
||||
|
@ -704,15 +708,19 @@ let Defs = [CC] in {
|
|||
def NIHH : BinaryRI<"nihh", 0xA54, and, GR64, imm64hh16c>;
|
||||
|
||||
// ANDs of a 32-bit immediate, leaving other bits unaffected.
|
||||
let isCodeGenOnly = 1 in
|
||||
// The CC result only reflects the 32-bit field, which means we can
|
||||
// use it as a zero indicator for i32 operations but not otherwise.
|
||||
let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in
|
||||
def NILF32 : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>;
|
||||
def NILF : BinaryRIL<"nilf", 0xC0B, and, GR64, imm64lf32c>;
|
||||
def NIHF : BinaryRIL<"nihf", 0xC0A, and, GR64, imm64hf32c>;
|
||||
}
|
||||
|
||||
// ANDs of memory.
|
||||
defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
|
||||
def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>;
|
||||
let CCValues = 0xC, CCHasZero = 1 in {
|
||||
defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
|
||||
def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>;
|
||||
}
|
||||
|
||||
// AND to memory
|
||||
defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, uimm8>;
|
||||
|
@ -726,12 +734,13 @@ defm : RMWIByte<and, bdaddr20pair, NIY>;
|
|||
|
||||
let Defs = [CC] in {
|
||||
// ORs of a register.
|
||||
let isCommutable = 1 in {
|
||||
let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in {
|
||||
defm OR : BinaryRRAndK<"o", 0x16, 0xB9F6, or, GR32, GR32>;
|
||||
defm OGR : BinaryRREAndK<"og", 0xB981, 0xB9E6, or, GR64, GR64>;
|
||||
}
|
||||
|
||||
// ORs of a 16-bit immediate, leaving other bits unaffected.
|
||||
// The CC result only reflects the 16-bit field, not the full register.
|
||||
let isCodeGenOnly = 1 in {
|
||||
def OILL32 : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>;
|
||||
def OILH32 : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>;
|
||||
|
@ -742,14 +751,18 @@ let Defs = [CC] in {
|
|||
def OIHH : BinaryRI<"oihh", 0xA58, or, GR64, imm64hh16>;
|
||||
|
||||
// ORs of a 32-bit immediate, leaving other bits unaffected.
|
||||
let isCodeGenOnly = 1 in
|
||||
// The CC result only reflects the 32-bit field, which means we can
|
||||
// use it as a zero indicator for i32 operations but not otherwise.
|
||||
let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in
|
||||
def OILF32 : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>;
|
||||
def OILF : BinaryRIL<"oilf", 0xC0D, or, GR64, imm64lf32>;
|
||||
def OIHF : BinaryRIL<"oihf", 0xC0C, or, GR64, imm64hf32>;
|
||||
|
||||
// ORs of memory.
|
||||
defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>;
|
||||
def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>;
|
||||
let CCValues = 0xC, CCHasZero = 1 in {
|
||||
defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>;
|
||||
def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>;
|
||||
}
|
||||
|
||||
// OR to memory
|
||||
defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, uimm8>;
|
||||
|
@ -763,20 +776,24 @@ defm : RMWIByte<or, bdaddr20pair, OIY>;
|
|||
|
||||
let Defs = [CC] in {
|
||||
// XORs of a register.
|
||||
let isCommutable = 1 in {
|
||||
let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in {
|
||||
defm XR : BinaryRRAndK<"x", 0x17, 0xB9F7, xor, GR32, GR32>;
|
||||
defm XGR : BinaryRREAndK<"xg", 0xB982, 0xB9E7, xor, GR64, GR64>;
|
||||
}
|
||||
|
||||
// XORs of a 32-bit immediate, leaving other bits unaffected.
|
||||
let isCodeGenOnly = 1 in
|
||||
// The CC result only reflects the 32-bit field, which means we can
|
||||
// use it as a zero indicator for i32 operations but not otherwise.
|
||||
let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in
|
||||
def XILF32 : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>;
|
||||
def XILF : BinaryRIL<"xilf", 0xC07, xor, GR64, imm64lf32>;
|
||||
def XIHF : BinaryRIL<"xihf", 0xC06, xor, GR64, imm64hf32>;
|
||||
|
||||
// XORs of memory.
|
||||
defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>;
|
||||
def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>;
|
||||
let CCValues = 0xC, CCHasZero = 1 in {
|
||||
defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>;
|
||||
def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>;
|
||||
}
|
||||
|
||||
// XOR to memory
|
||||
defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, uimm8>;
|
||||
|
@ -849,7 +866,7 @@ let neverHasSideEffects = 1 in {
|
|||
}
|
||||
|
||||
// Arithmetic shift right.
|
||||
let Defs = [CC] in {
|
||||
let Defs = [CC], CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in {
|
||||
defm SRA : ShiftRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>;
|
||||
def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64>;
|
||||
}
|
||||
|
@ -862,11 +879,12 @@ let neverHasSideEffects = 1 in {
|
|||
|
||||
// Rotate second operand left and inserted selected bits into first operand.
|
||||
// These can act like 32-bit operands provided that the constant start and
|
||||
// end bits (operands 2 and 3) are in the range [32, 64)
|
||||
// end bits (operands 2 and 3) are in the range [32, 64).
|
||||
let Defs = [CC] in {
|
||||
let isCodeGenOnly = 1 in
|
||||
def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>;
|
||||
def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>;
|
||||
def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>;
|
||||
let CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in
|
||||
def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>;
|
||||
}
|
||||
|
||||
// Forms of RISBG that only affect one word of the destination register.
|
||||
|
@ -880,7 +898,8 @@ def RISBLG : RotateSelectRIEf<"risblg", 0xEC51, GR64, GR64>,
|
|||
Requires<[FeatureHighWord]>;
|
||||
|
||||
// Rotate second operand left and perform a logical operation with selected
|
||||
// bits of the first operand.
|
||||
// bits of the first operand. The CC result only describes the selected bits,
|
||||
// so isn't useful for a full comparison against zero.
|
||||
let Defs = [CC] in {
|
||||
def RNSBG : RotateSelectRIEf<"rnsbg", 0xEC54, GR64, GR64>;
|
||||
def ROSBG : RotateSelectRIEf<"rosbg", 0xEC56, GR64, GR64>;
|
||||
|
@ -892,7 +911,7 @@ let Defs = [CC] in {
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Signed comparisons.
|
||||
let Defs = [CC] in {
|
||||
let Defs = [CC], CCValues = 0xE in {
|
||||
// Comparison with a register.
|
||||
def CR : CompareRR <"c", 0x19, z_cmp, GR32, GR32>;
|
||||
def CGFR : CompareRRE<"cgf", 0xB930, null_frag, GR64, GR32>;
|
||||
|
@ -926,7 +945,7 @@ let Defs = [CC] in {
|
|||
defm : SXB<z_cmp, GR64, CGFR>;
|
||||
|
||||
// Unsigned comparisons.
|
||||
let Defs = [CC] in {
|
||||
let Defs = [CC], CCValues = 0xE, IsLogical = 1 in {
|
||||
// Comparison with a register.
|
||||
def CLR : CompareRR <"cl", 0x15, z_ucmp, GR32, GR32>;
|
||||
def CLGFR : CompareRRE<"clgf", 0xB931, null_frag, GR64, GR32>;
|
||||
|
|
|
@ -7,18 +7,36 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass does two things:
|
||||
// (1) fuse compares and branches into COMPARE AND BRANCH instructions
|
||||
// (2) make sure that all branches are in range.
|
||||
// This pass does three things:
|
||||
// (1) try to remove compares if CC already contains the required information
|
||||
// (2) fuse compares and branches into COMPARE AND BRANCH instructions
|
||||
// (3) make sure that all branches are in range.
|
||||
//
|
||||
// We do (1) here rather than earlier because the fused form prevents
|
||||
// predication.
|
||||
// We do (1) here rather than earlier because some transformations can
|
||||
// change the set of available CC values and we generally want those
|
||||
// transformations to have priority over (1). This is especially true in
|
||||
// the commonest case where the CC value is used by a single in-range branch
|
||||
// instruction, since (2) will then be able to fuse the compare and the
|
||||
// branch instead.
|
||||
//
|
||||
// Doing it so late makes it more likely that a register will be reused
|
||||
// For example, two-address NILF can sometimes be converted into
|
||||
// three-address RISBLG. NILF produces a CC value that indicates whether
|
||||
// the low word is zero, but RISBLG does not modify CC at all. On the
|
||||
// other hand, 64-bit ANDs like NILL can sometimes be converted to RISBG.
|
||||
// The CC value produced by NILL isn't useful for our purposes, but the
|
||||
// value produced by RISBG can be used for any comparison with zero
|
||||
// (not just equality). So there are some transformations that lose
|
||||
// CC values (while still being worthwhile) and others that happen to make
|
||||
// the CC result more useful than it was originally.
|
||||
//
|
||||
// We do (2) here rather than earlier because the fused form prevents
|
||||
// predication. It also has to happen after (1).
|
||||
//
|
||||
// Doing (2) so late makes it more likely that a register will be reused
|
||||
// between the compare and the branch, but it isn't clear whether preventing
|
||||
// that would be a win or not.
|
||||
//
|
||||
// There are several ways in which (2) could be done. One aggressive
|
||||
// There are several ways in which (3) could be done. One aggressive
|
||||
// approach is to assume that all branches are in range and successively
|
||||
// replace those that turn out not to be in range with a longer form
|
||||
// (branch relaxation). A simple implementation is to continually walk
|
||||
|
@ -156,6 +174,7 @@ namespace {
|
|||
void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator,
|
||||
bool AssumeRelaxed);
|
||||
TerminatorInfo describeTerminator(MachineInstr *MI);
|
||||
bool optimizeCompareZero(MachineInstr *PrevCCSetter, MachineInstr *Compare);
|
||||
bool fuseCompareAndBranch(MachineInstr *Compare);
|
||||
uint64_t initMBBInfo();
|
||||
bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address);
|
||||
|
@ -254,6 +273,15 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) {
|
|||
return Terminator;
|
||||
}
|
||||
|
||||
// Return true if CC is live out of MBB.
|
||||
static bool isCCLiveOut(MachineBasicBlock *MBB) {
|
||||
for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
|
||||
SE = MBB->succ_end(); SI != SE; ++SI)
|
||||
if ((*SI)->isLiveIn(SystemZ::CC))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Return true if CC is live after MBBI.
|
||||
static bool isCCLiveAfter(MachineBasicBlock::iterator MBBI,
|
||||
const TargetRegisterInfo *TRI) {
|
||||
|
@ -269,12 +297,130 @@ static bool isCCLiveAfter(MachineBasicBlock::iterator MBBI,
|
|||
return false;
|
||||
}
|
||||
|
||||
for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
|
||||
SE = MBB->succ_end(); SI != SE; ++SI)
|
||||
if ((*SI)->isLiveIn(SystemZ::CC))
|
||||
return true;
|
||||
return isCCLiveOut(MBB);
|
||||
}
|
||||
|
||||
return false;
|
||||
// Return true if all uses of the CC value produced by MBBI could make do
|
||||
// with the CC values in ReusableCCMask. When returning true, point AlterMasks
|
||||
// to the "CC valid" and "CC mask" operands for each condition.
|
||||
static bool canRestrictCCMask(MachineBasicBlock::iterator MBBI,
|
||||
unsigned ReusableCCMask,
|
||||
SmallVectorImpl<MachineOperand *> &AlterMasks,
|
||||
const TargetRegisterInfo *TRI) {
|
||||
MachineBasicBlock *MBB = MBBI->getParent();
|
||||
MachineBasicBlock::iterator MBBE = MBB->end();
|
||||
for (++MBBI; MBBI != MBBE; ++MBBI) {
|
||||
if (MBBI->readsRegister(SystemZ::CC, TRI)) {
|
||||
// Fail if this isn't a use of CC that we understand.
|
||||
unsigned MBBIFlags = MBBI->getDesc().TSFlags;
|
||||
unsigned FirstOpNum;
|
||||
if (MBBIFlags & SystemZII::CCMaskFirst)
|
||||
FirstOpNum = 0;
|
||||
else if (MBBIFlags & SystemZII::CCMaskLast)
|
||||
FirstOpNum = MBBI->getNumExplicitOperands() - 2;
|
||||
else
|
||||
return false;
|
||||
|
||||
// Check whether the instruction predicate treats all CC values
|
||||
// outside of ReusableCCMask in the same way. In that case it
|
||||
// doesn't matter what those CC values mean.
|
||||
unsigned CCValid = MBBI->getOperand(FirstOpNum).getImm();
|
||||
unsigned CCMask = MBBI->getOperand(FirstOpNum + 1).getImm();
|
||||
unsigned OutValid = ~ReusableCCMask & CCValid;
|
||||
unsigned OutMask = ~ReusableCCMask & CCMask;
|
||||
if (OutMask != 0 && OutMask != OutValid)
|
||||
return false;
|
||||
|
||||
AlterMasks.push_back(&MBBI->getOperand(FirstOpNum));
|
||||
AlterMasks.push_back(&MBBI->getOperand(FirstOpNum + 1));
|
||||
|
||||
// Succeed if this was the final use of the CC value.
|
||||
if (MBBI->killsRegister(SystemZ::CC, TRI))
|
||||
return true;
|
||||
}
|
||||
// Succeed if the instruction redefines CC.
|
||||
if (MBBI->definesRegister(SystemZ::CC, TRI))
|
||||
return true;
|
||||
}
|
||||
// Fail if there are other uses of CC that we didn't see.
|
||||
return !isCCLiveOut(MBB);
|
||||
}
|
||||
|
||||
// Try to make Compare redundant with PrevCCSetter, the previous setter of CC,
|
||||
// by looking for cases where Compare compares the result of PrevCCSetter
|
||||
// against zero. Return true on success and if Compare can therefore
|
||||
// be deleted.
|
||||
bool SystemZLongBranch::optimizeCompareZero(MachineInstr *PrevCCSetter,
|
||||
MachineInstr *Compare) {
|
||||
if (MF->getTarget().getOptLevel() == CodeGenOpt::None)
|
||||
return false;
|
||||
|
||||
// Check whether this is a comparison against zero.
|
||||
if (Compare->getNumExplicitOperands() != 2 ||
|
||||
!Compare->getOperand(1).isImm() ||
|
||||
Compare->getOperand(1).getImm() != 0)
|
||||
return false;
|
||||
|
||||
// See which compare-style condition codes are available after PrevCCSetter.
|
||||
unsigned PrevFlags = PrevCCSetter->getDesc().TSFlags;
|
||||
unsigned ReusableCCMask = 0;
|
||||
if (PrevFlags & SystemZII::CCHasZero)
|
||||
ReusableCCMask |= SystemZ::CCMASK_CMP_EQ;
|
||||
|
||||
// For unsigned comparisons with zero, only equality makes sense.
|
||||
unsigned CompareFlags = Compare->getDesc().TSFlags;
|
||||
if (!(CompareFlags & SystemZII::IsLogical) &&
|
||||
(PrevFlags & SystemZII::CCHasOrder))
|
||||
ReusableCCMask |= SystemZ::CCMASK_CMP_LT | SystemZ::CCMASK_CMP_GT;
|
||||
|
||||
if (ReusableCCMask == 0)
|
||||
return false;
|
||||
|
||||
// Make sure that PrevCCSetter sets the value being compared.
|
||||
unsigned SrcReg = Compare->getOperand(0).getReg();
|
||||
unsigned SrcSubReg = Compare->getOperand(0).getSubReg();
|
||||
if (!PrevCCSetter->getOperand(0).isReg() ||
|
||||
!PrevCCSetter->getOperand(0).isDef() ||
|
||||
PrevCCSetter->getOperand(0).getReg() != SrcReg ||
|
||||
PrevCCSetter->getOperand(0).getSubReg() != SrcSubReg)
|
||||
return false;
|
||||
|
||||
// Make sure that SrcReg survives until Compare.
|
||||
MachineBasicBlock::iterator MBBI = PrevCCSetter, MBBE = Compare;
|
||||
const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
|
||||
for (++MBBI; MBBI != MBBE; ++MBBI)
|
||||
if (MBBI->modifiesRegister(SrcReg, TRI))
|
||||
return false;
|
||||
|
||||
// See whether all uses of Compare's CC value could make do with
|
||||
// the values produced by PrevCCSetter.
|
||||
SmallVector<MachineOperand *, 4> AlterMasks;
|
||||
if (!canRestrictCCMask(Compare, ReusableCCMask, AlterMasks, TRI))
|
||||
return false;
|
||||
|
||||
// Alter the CC masks that canRestrictCCMask says need to be altered.
|
||||
unsigned CCValues = SystemZII::getCCValues(PrevFlags);
|
||||
assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues");
|
||||
for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) {
|
||||
AlterMasks[I]->setImm(CCValues);
|
||||
unsigned CCMask = AlterMasks[I + 1]->getImm();
|
||||
if (CCMask & ~ReusableCCMask)
|
||||
AlterMasks[I + 1]->setImm((CCMask & ReusableCCMask) |
|
||||
(CCValues & ~ReusableCCMask));
|
||||
}
|
||||
|
||||
// CC is now live after PrevCCSetter.
|
||||
int CCDef = PrevCCSetter->findRegisterDefOperandIdx(SystemZ::CC, false,
|
||||
true, TRI);
|
||||
assert(CCDef >= 0 && "Couldn't find CC set");
|
||||
PrevCCSetter->getOperand(CCDef).setIsDead(false);
|
||||
|
||||
// Clear any intervening kills of CC.
|
||||
MBBI = PrevCCSetter;
|
||||
for (++MBBI; MBBI != MBBE; ++MBBI)
|
||||
MBBI->clearRegisterKills(SystemZ::CC, TRI);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Try to fuse compare instruction Compare into a later branch. Return
|
||||
|
@ -345,6 +491,8 @@ bool SystemZLongBranch::fuseCompareAndBranch(MachineInstr *Compare) {
|
|||
// that no branches need relaxation. Return the size of the function under
|
||||
// this assumption.
|
||||
uint64_t SystemZLongBranch::initMBBInfo() {
|
||||
const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
|
||||
|
||||
MF->RenumberBlocks();
|
||||
unsigned NumBlocks = MF->size();
|
||||
|
||||
|
@ -365,13 +513,20 @@ uint64_t SystemZLongBranch::initMBBInfo() {
|
|||
// Calculate the size of the fixed part of the block.
|
||||
MachineBasicBlock::iterator MI = MBB->begin();
|
||||
MachineBasicBlock::iterator End = MBB->end();
|
||||
MachineInstr *PrevCCSetter = 0;
|
||||
while (MI != End && !MI->isTerminator()) {
|
||||
MachineInstr *Current = MI;
|
||||
++MI;
|
||||
if (Current->isCompare() && fuseCompareAndBranch(Current))
|
||||
Current->removeFromParent();
|
||||
else
|
||||
Block.Size += TII->getInstSizeInBytes(Current);
|
||||
if (Current->isCompare()) {
|
||||
if ((PrevCCSetter && optimizeCompareZero(PrevCCSetter, Current)) ||
|
||||
fuseCompareAndBranch(Current)) {
|
||||
Current->removeFromParent();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (Current->modifiesRegister(SystemZ::CC, TRI))
|
||||
PrevCCSetter = Current;
|
||||
Block.Size += TII->getInstSizeInBytes(Current);
|
||||
}
|
||||
skipNonTerminators(Position, Block);
|
||||
|
||||
|
|
|
@ -0,0 +1,576 @@
|
|||
; Test that compares are ommitted if CC already has the right value
|
||||
; (z10 version).
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
|
||||
|
||||
declare void @foo()
|
||||
|
||||
; Addition provides enough for equality comparisons with zero. First teest
|
||||
; the EQ case.
|
||||
define i32 @f1(i32 %a, i32 %b, i32 *%dest) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: afi %r2, 1000000
|
||||
; CHECK-NEXT: je .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = add i32 %a, 1000000
|
||||
%cmp = icmp eq i32 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %b, i32 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; ...and again with NE.
|
||||
define i32 @f2(i32 %a, i32 %b, i32 *%dest) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: afi %r2, 1000000
|
||||
; CHECK-NEXT: jne .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = add i32 %a, 1000000
|
||||
%cmp = icmp ne i32 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %b, i32 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; SLT requires a comparison.
|
||||
define i32 @f3(i32 %a, i32 %b, i32 *%dest) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: afi %r2, 1000000
|
||||
; CHECK-NEXT: cijl %r2, 0, .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = add i32 %a, 1000000
|
||||
%cmp = icmp slt i32 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %b, i32 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; ...SLE too.
|
||||
define i32 @f4(i32 %a, i32 %b, i32 *%dest) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: afi %r2, 1000000
|
||||
; CHECK-NEXT: cijle %r2, 0, .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = add i32 %a, 1000000
|
||||
%cmp = icmp sle i32 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %b, i32 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; ...SGT too.
|
||||
define i32 @f5(i32 %a, i32 %b, i32 *%dest) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: afi %r2, 1000000
|
||||
; CHECK-NEXT: cijh %r2, 0, .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = add i32 %a, 1000000
|
||||
%cmp = icmp sgt i32 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %b, i32 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; ...SGE too.
|
||||
define i32 @f6(i32 %a, i32 %b, i32 *%dest) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: afi %r2, 1000000
|
||||
; CHECK-NEXT: cijhe %r2, 0, .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = add i32 %a, 1000000
|
||||
%cmp = icmp sge i32 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %b, i32 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; Subtraction also provides enough for equality comparisons with zero.
|
||||
define i32 @f7(i32 %a, i32 %b, i32 *%dest) {
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK: s %r2, 0(%r4)
|
||||
; CHECK-NEXT: jne .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%cur = load i32 *%dest
|
||||
%res = sub i32 %a, %cur
|
||||
%cmp = icmp ne i32 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %b, i32 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; ...but not for ordered comparisons.
|
||||
define i32 @f8(i32 %a, i32 %b, i32 *%dest) {
|
||||
; CHECK-LABEL: f8:
|
||||
; CHECK: s %r2, 0(%r4)
|
||||
; CHECK-NEXT: cijl %r2, 0, .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%cur = load i32 *%dest
|
||||
%res = sub i32 %a, %cur
|
||||
%cmp = icmp slt i32 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %b, i32 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; Logic register-register instructions also provide enough for equality
|
||||
; comparisons with zero.
|
||||
define i32 @f9(i32 %a, i32 %b, i32 *%dest) {
|
||||
; CHECK-LABEL: f9:
|
||||
; CHECK: nr %r2, %r3
|
||||
; CHECK-NEXT: jl .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = and i32 %a, %b
|
||||
%cmp = icmp ne i32 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %b, i32 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; ...but not for ordered comparisons.
|
||||
define i32 @f10(i32 %a, i32 %b, i32 *%dest) {
|
||||
; CHECK-LABEL: f10:
|
||||
; CHECK: nr %r2, %r3
|
||||
; CHECK-NEXT: cijl %r2, 0, .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = and i32 %a, %b
|
||||
%cmp = icmp slt i32 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %b, i32 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; Logic register-immediate instructions also provide enough for equality
|
||||
; comparisons with zero if the immediate covers the whole register.
|
||||
define i32 @f11(i32 %a, i32 %b, i32 *%dest) {
|
||||
; CHECK-LABEL: f11:
|
||||
; CHECK: nilf %r2, 100
|
||||
; CHECK-NEXT: jl .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = and i32 %a, 100
|
||||
%cmp = icmp ne i32 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %b, i32 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; Partial logic register-immediate instructions do not provide simple
|
||||
; zero results.
|
||||
define i32 @f12(i32 %a, i32 %b, i32 *%dest) {
|
||||
; CHECK-LABEL: f12:
|
||||
; CHECK: nill %r2, 65436
|
||||
; CHECK-NEXT: cijlh %r2, 0, .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = and i32 %a, -100
|
||||
%cmp = icmp ne i32 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %b, i32 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; SRA provides the same CC result as a comparison with zero.
|
||||
define i32 @f13(i32 %a, i32 %b, i32 *%dest) {
|
||||
; CHECK-LABEL: f13:
|
||||
; CHECK: sra %r2, 0(%r3)
|
||||
; CHECK-NEXT: je .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = ashr i32 %a, %b
|
||||
%cmp = icmp eq i32 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %b, i32 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; ...and again with NE.
|
||||
define i32 @f14(i32 %a, i32 %b, i32 *%dest) {
|
||||
; CHECK-LABEL: f14:
|
||||
; CHECK: sra %r2, 0(%r3)
|
||||
; CHECK-NEXT: jlh .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = ashr i32 %a, %b
|
||||
%cmp = icmp ne i32 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %b, i32 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; ...and SLT.
|
||||
define i32 @f15(i32 %a, i32 %b, i32 *%dest) {
|
||||
; CHECK-LABEL: f15:
|
||||
; CHECK: sra %r2, 0(%r3)
|
||||
; CHECK-NEXT: jl .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = ashr i32 %a, %b
|
||||
%cmp = icmp slt i32 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %b, i32 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; ...and SLE.
|
||||
define i32 @f16(i32 %a, i32 %b, i32 *%dest) {
|
||||
; CHECK-LABEL: f16:
|
||||
; CHECK: sra %r2, 0(%r3)
|
||||
; CHECK-NEXT: jle .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = ashr i32 %a, %b
|
||||
%cmp = icmp sle i32 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %b, i32 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; ...and SGT.
|
||||
define i32 @f17(i32 %a, i32 %b, i32 *%dest) {
|
||||
; CHECK-LABEL: f17:
|
||||
; CHECK: sra %r2, 0(%r3)
|
||||
; CHECK-NEXT: jh .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = ashr i32 %a, %b
|
||||
%cmp = icmp sgt i32 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %b, i32 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; ...and SGE.
|
||||
define i32 @f18(i32 %a, i32 %b, i32 *%dest) {
|
||||
; CHECK-LABEL: f18:
|
||||
; CHECK: sra %r2, 0(%r3)
|
||||
; CHECK-NEXT: jhe .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = ashr i32 %a, %b
|
||||
%cmp = icmp sge i32 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %b, i32 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; RISBG provides the same result as a comparison against zero.
|
||||
; Test the EQ case.
|
||||
define i64 @f19(i64 %a, i64 %b, i64 *%dest) {
|
||||
; CHECK-LABEL: f19:
|
||||
; CHECK: risbg %r2, %r3, 0, 190, 0
|
||||
; CHECK-NEXT: je .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = and i64 %b, -2
|
||||
%cmp = icmp eq i64 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i64 %b, i64 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
; ...and the SLT case.
|
||||
define i64 @f20(i64 %a, i64 %b, i64 *%dest) {
|
||||
; CHECK-LABEL: f20:
|
||||
; CHECK: risbg %r2, %r3, 0, 190, 0
|
||||
; CHECK-NEXT: jl .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = and i64 %b, -2
|
||||
%cmp = icmp slt i64 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i64 %b, i64 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
; Test a case where the register we're testing is set by a non-CC-clobbering
|
||||
; instruction.
|
||||
define i32 @f21(i32 %a, i32 %b, i32 *%dest) {
|
||||
; CHECK-LABEL: f21:
|
||||
; CHECK: afi %r2, 1000000
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: blah %r2
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: cije %r2, 0, .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%add = add i32 %a, 1000000
|
||||
%res = call i32 asm "blah $0", "=r,0" (i32 %add)
|
||||
%cmp = icmp eq i32 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %b, i32 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; ...and again with a CC-clobbering instruction.
|
||||
define i32 @f22(i32 %a, i32 %b, i32 *%dest) {
|
||||
; CHECK-LABEL: f22:
|
||||
; CHECK: afi %r2, 1000000
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: blah %r2
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: cije %r2, 0, .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%add = add i32 %a, 1000000
|
||||
%res = call i32 asm "blah $0", "=r,0,~{cc}" (i32 %add)
|
||||
%cmp = icmp eq i32 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %b, i32 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; Check that stores do not interfere.
|
||||
define i32 @f23(i32 %a, i32 %b, i32 *%dest1, i32 *%dest2) {
|
||||
; CHECK-LABEL: f23:
|
||||
; CHECK: afi %r2, 1000000
|
||||
; CHECK-NEXT: st %r2, 0(%r4)
|
||||
; CHECK-NEXT: jne .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%res = add i32 %a, 1000000
|
||||
store i32 %res, i32 *%dest1
|
||||
%cmp = icmp ne i32 %res, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %b, i32 *%dest2
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; Check that calls do interfere.
|
||||
define void @f24(i32 *%ptr) {
|
||||
; CHECK-LABEL: f24:
|
||||
; CHECK: afi [[REG:%r[0-9]+]], 1000000
|
||||
; CHECK-NEXT: brasl %r14, foo@PLT
|
||||
; CHECK-NEXT: cijlh [[REG]], 0, .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%val = load i32 *%ptr
|
||||
%xor = xor i32 %val, 1
|
||||
%add = add i32 %xor, 1000000
|
||||
call void @foo()
|
||||
%cmp = icmp ne i32 %add, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %add, i32 *%ptr
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that inline asms don't interfere if they don't clobber CC.
|
||||
define void @f25(i32 %a, i32 *%ptr) {
|
||||
; CHECK-LABEL: f25:
|
||||
; CHECK: afi %r2, 1000000
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: blah
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: jne .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%add = add i32 %a, 1000000
|
||||
call void asm sideeffect "blah", "r"(i32 %add)
|
||||
%cmp = icmp ne i32 %add, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %add, i32 *%ptr
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; ...but do interfere if they do clobber CC.
|
||||
define void @f26(i32 %a, i32 *%ptr) {
|
||||
; CHECK-LABEL: f26:
|
||||
; CHECK: afi %r2, 1000000
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: blah
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: cijlh %r2, 0, .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%add = add i32 %a, 1000000
|
||||
call void asm sideeffect "blah", "r,~{cc}"(i32 %add)
|
||||
%cmp = icmp ne i32 %add, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %add, i32 *%ptr
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test a case where CC is set based on a different register from the
|
||||
; compare input.
|
||||
define i32 @f27(i32 %a, i32 %b, i32 *%dest1, i32 *%dest2) {
|
||||
; CHECK-LABEL: f27:
|
||||
; CHECK: afi %r2, 1000000
|
||||
; CHECK-NEXT: sr %r3, %r2
|
||||
; CHECK-NEXT: st %r3, 0(%r4)
|
||||
; CHECK-NEXT: cije %r2, 0, .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%add = add i32 %a, 1000000
|
||||
%sub = sub i32 %b, %add
|
||||
store i32 %sub, i32 *%dest1
|
||||
%cmp = icmp eq i32 %add, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i32 %sub, i32 *%dest2
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; Make sure that we don't confuse a base register for a destination.
|
||||
define void @f28(i64 %a, i64 *%dest) {
|
||||
; CHECK-LABEL: f28:
|
||||
; CHECK: xi 0(%r2), 15
|
||||
; CHECK: cgije %r2, 0, .L{{.*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%ptr = inttoptr i64 %a to i8 *
|
||||
%val = load i8 *%ptr
|
||||
%xor = xor i8 %val, 15
|
||||
store i8 %xor, i8 *%ptr
|
||||
%cmp = icmp eq i64 %a, 0
|
||||
br i1 %cmp, label %exit, label %store
|
||||
|
||||
store:
|
||||
store i64 %a, i64 *%dest
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,115 @@
|
|||
; Test that compares are ommitted if CC already has the right value
|
||||
; (z196 version).
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
|
||||
|
||||
; Addition provides enough for equality comparisons with zero. First teest
|
||||
; the EQ case with LOC.
|
||||
define i32 @f1(i32 %a, i32 %b, i32 *%cptr) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: afi %r2, 1000000
|
||||
; CHECK-NEXT: loce %r3, 0(%r4)
|
||||
; CHECK: br %r14
|
||||
%add = add i32 %a, 1000000
|
||||
%cmp = icmp eq i32 %add, 0
|
||||
%c = load i32 *%cptr
|
||||
%arg = select i1 %cmp, i32 %c, i32 %b
|
||||
call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; ...and again with STOC.
|
||||
define i32 @f2(i32 %a, i32 %b, i32 *%cptr) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: afi %r2, 1000000
|
||||
; CHECK-NEXT: stoce %r3, 0(%r4)
|
||||
; CHECK: br %r14
|
||||
%add = add i32 %a, 1000000
|
||||
%cmp = icmp eq i32 %add, 0
|
||||
%c = load i32 *%cptr
|
||||
%newval = select i1 %cmp, i32 %b, i32 %c
|
||||
store i32 %newval, i32 *%cptr
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; Reverse the select order and test with LOCR.
|
||||
define i32 @f3(i32 %a, i32 %b, i32 %c) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: afi %r2, 1000000
|
||||
; CHECK-NEXT: locrne %r3, %r4
|
||||
; CHECK: br %r14
|
||||
%add = add i32 %a, 1000000
|
||||
%cmp = icmp eq i32 %add, 0
|
||||
%arg = select i1 %cmp, i32 %b, i32 %c
|
||||
call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; ...and again with LOC.
|
||||
define i32 @f4(i32 %a, i32 %b, i32 *%cptr) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: afi %r2, 1000000
|
||||
; CHECK-NEXT: locne %r3, 0(%r4)
|
||||
; CHECK: br %r14
|
||||
%add = add i32 %a, 1000000
|
||||
%cmp = icmp eq i32 %add, 0
|
||||
%c = load i32 *%cptr
|
||||
%arg = select i1 %cmp, i32 %b, i32 %c
|
||||
call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; ...and again with STOC.
|
||||
define i32 @f5(i32 %a, i32 %b, i32 *%cptr) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: afi %r2, 1000000
|
||||
; CHECK-NEXT: stocne %r3, 0(%r4)
|
||||
; CHECK: br %r14
|
||||
%add = add i32 %a, 1000000
|
||||
%cmp = icmp eq i32 %add, 0
|
||||
%c = load i32 *%cptr
|
||||
%newval = select i1 %cmp, i32 %c, i32 %b
|
||||
store i32 %newval, i32 *%cptr
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; Change the EQ in f3 to NE.
|
||||
define i32 @f6(i32 %a, i32 %b, i32 %c) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: afi %r2, 1000000
|
||||
; CHECK-NEXT: locre %r3, %r4
|
||||
; CHECK: br %r14
|
||||
%add = add i32 %a, 1000000
|
||||
%cmp = icmp ne i32 %add, 0
|
||||
%arg = select i1 %cmp, i32 %b, i32 %c
|
||||
call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; ...and again with LOC.
|
||||
define i32 @f7(i32 %a, i32 %b, i32 *%cptr) {
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK: afi %r2, 1000000
|
||||
; CHECK-NEXT: loce %r3, 0(%r4)
|
||||
; CHECK: br %r14
|
||||
%add = add i32 %a, 1000000
|
||||
%cmp = icmp ne i32 %add, 0
|
||||
%c = load i32 *%cptr
|
||||
%arg = select i1 %cmp, i32 %b, i32 %c
|
||||
call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; ...and again with STOC.
|
||||
define i32 @f8(i32 %a, i32 %b, i32 *%cptr) {
|
||||
; CHECK-LABEL: f8:
|
||||
; CHECK: afi %r2, 1000000
|
||||
; CHECK-NEXT: stoce %r3, 0(%r4)
|
||||
; CHECK: br %r14
|
||||
%add = add i32 %a, 1000000
|
||||
%cmp = icmp ne i32 %add, 0
|
||||
%c = load i32 *%cptr
|
||||
%newval = select i1 %cmp, i32 %c, i32 %b
|
||||
store i32 %newval, i32 *%cptr
|
||||
ret i32 %add
|
||||
}
|
Loading…
Reference in New Issue