diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 59dd46c0697c..26af3f4ebcc0 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -351,6 +351,7 @@ public: void Select(SDNode *Node) override; bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) override; + bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override; void PreprocessISelDAG() override; // Include the pieces autogenerated from the target description. @@ -1445,6 +1446,52 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, return true; } +// IsProfitableToFold - Returns true if is profitable to fold the specific +// operand node N of U during instruction selection that starts at Root. +bool +SystemZDAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, + SDNode *Root) const { + // We want to avoid folding a LOAD into an ICMP node if as a result + // we would be forced to spill the condition code into a GPR. + if (N.getOpcode() == ISD::LOAD && U->getOpcode() == SystemZISD::ICMP) { + if (!N.hasOneUse() || !U->hasOneUse()) + return false; + + // The user of the CC value will usually be a CopyToReg into the + // physical CC register, which in turn is glued and chained to the + // actual instruction that uses the CC value. Bail out if we have + // anything else than that. + SDNode *CCUser = *U->use_begin(); + SDNode *CCRegUser = nullptr; + if (CCUser->getOpcode() == ISD::CopyToReg || + cast(CCUser->getOperand(1))->getReg() == SystemZ::CC) { + for (auto *U : CCUser->uses()) { + if (CCRegUser == nullptr) + CCRegUser = U; + else if (CCRegUser != U) + return false; + } + } + if (CCRegUser == nullptr) + return false; + + // If the actual instruction is a branch, the only thing that remains to be + // checked is whether the CCUser chain is a predecessor of the load. + if (CCRegUser->isMachineOpcode() && + CCRegUser->getMachineOpcode() == SystemZ::BRC) + return !N->isPredecessorOf(CCUser->getOperand(0).getNode()); + + // Otherwise, the instruction may have multiple operands, and we need to + // verify that none of them are a predecessor of the load. This is exactly + // the same check that would be done by common code if the CC setter were + // glued to the CC user, so simply invoke that check here. + if (!IsLegalToFold(N, U, CCRegUser, OptLevel, false)) + return false; + } + + return true; +} + namespace { // Represents a sequence for extracting a 0/1 value from an IPM result: // (((X ^ XORValue) + AddValue) >> Bit) @@ -1543,9 +1590,9 @@ SDValue SystemZDAGToDAGISel::expandSelectBoolean(SDNode *Node) { int CCMask = CCMaskOp->getZExtValue(); SDLoc DL(Node); - SDValue Glue = Node->getOperand(4); + SDValue CCReg = Node->getOperand(4); IPMConversion IPM = getIPMConversion(CCValid, CCMask); - SDValue Result = CurDAG->getNode(SystemZISD::IPM, DL, MVT::i32, Glue); + SDValue Result = CurDAG->getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); if (IPM.XORValue) Result = CurDAG->getNode(ISD::XOR, DL, MVT::i32, Result, diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index bf5a28cdf4a1..ab314e1f3119 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1662,9 +1662,9 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { } } -// Emit an intrinsic with chain with a glued value instead of its CC result. -static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op, - unsigned Opcode) { +// Emit an intrinsic with chain and an explicit CC register result. +static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, + unsigned Opcode) { // Copy all operands except the intrinsic ID. unsigned NumOps = Op.getNumOperands(); SmallVector Ops; @@ -1674,17 +1674,17 @@ static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op, Ops.push_back(Op.getOperand(I)); assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); - SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other); SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); SDValue OldChain = SDValue(Op.getNode(), 1); - SDValue NewChain = SDValue(Intr.getNode(), 0); + SDValue NewChain = SDValue(Intr.getNode(), 1); DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain); - return Intr; + return Intr.getNode(); } -// Emit an intrinsic with a glued value instead of its CC result. -static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op, - unsigned Opcode) { +// Emit an intrinsic with an explicit CC register result. +static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, + unsigned Opcode) { // Copy all operands except the intrinsic ID. unsigned NumOps = Op.getNumOperands(); SmallVector Ops; @@ -1692,11 +1692,8 @@ static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op, for (unsigned I = 1; I < NumOps; ++I) Ops.push_back(Op.getOperand(I)); - if (Op->getNumValues() == 1) - return DAG.getNode(Opcode, SDLoc(Op), MVT::Glue, Ops); - assert(Op->getNumValues() == 2 && "Expected exactly one non-CC result"); - SDVTList RawVTs = DAG.getVTList(Op->getValueType(0), MVT::Glue); - return DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); + SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops); + return Intr.getNode(); } // CC is a comparison that will be implemented using an integer or @@ -2310,29 +2307,28 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, // Emit the comparison instruction described by C. static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { if (!C.Op1.getNode()) { - SDValue Op; + SDNode *Node; switch (C.Op0.getOpcode()) { case ISD::INTRINSIC_W_CHAIN: - Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode); - break; + Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode); + return SDValue(Node, 0); case ISD::INTRINSIC_WO_CHAIN: - Op = emitIntrinsicWithGlue(DAG, C.Op0, C.Opcode); - break; + Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode); + return SDValue(Node, Node->getNumValues() - 1); default: llvm_unreachable("Invalid comparison operands"); } - return SDValue(Op.getNode(), Op->getNumValues() - 1); } if (C.Opcode == SystemZISD::ICMP) - return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1, + return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1, DAG.getConstant(C.ICmpType, DL, MVT::i32)); if (C.Opcode == SystemZISD::TM) { bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); - return DAG.getNode(SystemZISD::TM, DL, MVT::Glue, C.Op0, C.Op1, + return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1, DAG.getConstant(RegisterOnly, DL, MVT::i32)); } - return DAG.getNode(C.Opcode, DL, MVT::Glue, C.Op0, C.Op1); + return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1); } // Implement a 32-bit *MUL_LOHI operation by extending both operands to @@ -2363,15 +2359,15 @@ static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result); } -// Return an i32 value that is 1 if the CC value produced by Glue is +// Return an i32 value that is 1 if the CC value produced by CCReg is // in the mask CCMask and 0 otherwise. CC is known to have a value // in CCValid, so other values can be ignored. -static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue Glue, +static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask) { SDValue Ops[] = { DAG.getConstant(1, DL, MVT::i32), DAG.getConstant(0, DL, MVT::i32), DAG.getConstant(CCValid, DL, MVT::i32), - DAG.getConstant(CCMask, DL, MVT::i32), Glue }; + DAG.getConstant(CCMask, DL, MVT::i32), CCReg }; return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops); } @@ -2521,8 +2517,8 @@ SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1); Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); - SDValue Glue = emitCmp(DAG, DL, C); - return emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask); + SDValue CCReg = emitCmp(DAG, DL, C); + return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); } SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { @@ -2533,10 +2529,10 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); - SDValue Glue = emitCmp(DAG, DL, C); + SDValue CCReg = emitCmp(DAG, DL, C); return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32), - DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, Glue); + DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, CCReg); } // Return true if Pos is CmpOp and Neg is the negative of CmpOp, @@ -2586,9 +2582,9 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT); } - SDValue Glue = emitCmp(DAG, DL, C); + SDValue CCReg = emitCmp(DAG, DL, C); SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32), - DAG.getConstant(C.CCMask, DL, MVT::i32), Glue}; + DAG.getConstant(C.CCMask, DL, MVT::i32), CCReg}; return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops); } @@ -3454,16 +3450,16 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, EVT NarrowVT = Node->getMemoryVT(); EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32; if (NarrowVT == WideVT) { - SDVTList Tys = DAG.getVTList(WideVT, MVT::Other, MVT::Glue); + SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other); SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal }; SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP, DL, Tys, Ops, NarrowVT, MMO); - SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(2), + SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1), SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ); DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0)); DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); - DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(1)); + DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2)); return SDValue(); } @@ -3488,17 +3484,17 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, DAG.getConstant(0, DL, WideVT), BitShift); // Construct the ATOMIC_CMP_SWAPW node. - SDVTList VTList = DAG.getVTList(WideVT, MVT::Other, MVT::Glue); + SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other); SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, NegBitShift, DAG.getConstant(BitSize, DL, WideVT) }; SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL, VTList, Ops, NarrowVT, MMO); - SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(2), + SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1), SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ); DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0)); DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); - DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(1)); + DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2)); return SDValue(); } @@ -3555,12 +3551,10 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, Node->getMemoryVT(), Node->getMemOperand()); } -// Return an i32 that contains the value of CC immediately after After, -// whose final operand must be MVT::Glue. -static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) { - SDLoc DL(After); - SDValue Glue = SDValue(After, After->getNumValues() - 1); - SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); +// Convert condition code in CCReg to an i32 value. +static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) { + SDLoc DL(CCReg); + SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); } @@ -3571,8 +3565,8 @@ SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, unsigned Opcode, CCValid; if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) { assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); - SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode); - SDValue CC = getCCResult(DAG, Glued.getNode()); + SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode); + SDValue CC = getCCResult(DAG, SDValue(Node, 0)); DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC); return SDValue(); } @@ -3585,13 +3579,12 @@ SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned Opcode, CCValid; if (isIntrinsicWithCC(Op, Opcode, CCValid)) { - SDValue Glued = emitIntrinsicWithGlue(DAG, Op, Opcode); - SDValue CC = getCCResult(DAG, Glued.getNode()); + SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode); if (Op->getNumValues() == 1) - return CC; + return getCCResult(DAG, SDValue(Node, 0)); assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result"); - return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), Glued, - CC); + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), + SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1))); } unsigned Id = cast(Op.getOperand(0))->getZExtValue(); @@ -4828,19 +4821,19 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N, } case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { SDLoc DL(N); - SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other, MVT::Glue); + SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), lowerI128ToGR128(DAG, N->getOperand(2)), lowerI128ToGR128(DAG, N->getOperand(3)) }; MachineMemOperand *MMO = cast(N)->getMemOperand(); SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128, DL, Tys, Ops, MVT::i128, MMO); - SDValue Success = emitSETCC(DAG, DL, Res.getValue(2), + SDValue Success = emitSETCC(DAG, DL, Res.getValue(1), SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ); Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1)); Results.push_back(lowerGR128ToI128(DAG, Res)); Results.push_back(Success); - Results.push_back(Res.getValue(1)); + Results.push_back(Res.getValue(2)); break; } default: @@ -5465,10 +5458,10 @@ SDValue SystemZTargetLowering::combineSHIFTROT( return SDValue(); } -static bool combineCCMask(SDValue &Glue, int &CCValid, int &CCMask) { +static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code - // set by the glued instruction using the CCValid / CCMask masks, - // If the glued instruction is itself a (ICMP (SELECT_CCMASK)) testing + // set by the CCReg instruction using the CCValid / CCMask masks, + // If the CCReg instruction is itself a (ICMP (SELECT_CCMASK)) testing // the condition code set by some other instruction, see whether we // can directly use that condition code. bool Invert = false; @@ -5481,15 +5474,13 @@ static bool combineCCMask(SDValue &Glue, int &CCValid, int &CCMask) { else if (CCMask != SystemZ::CCMASK_CMP_EQ) return false; - // Verify that we have an ICMP that is the single user of a SELECT_CCMASK. - SDNode *ICmp = Glue.getNode(); + // Verify that we have an ICMP that is the user of a SELECT_CCMASK. + SDNode *ICmp = CCReg.getNode(); if (ICmp->getOpcode() != SystemZISD::ICMP) return false; SDNode *Select = ICmp->getOperand(0).getNode(); if (Select->getOpcode() != SystemZISD::SELECT_CCMASK) return false; - if (!Select->hasOneUse()) - return false; // Verify that the ICMP compares against one of select values. auto *CompareVal = dyn_cast(ICmp->getOperand(1)); @@ -5516,25 +5507,8 @@ static bool combineCCMask(SDValue &Glue, int &CCValid, int &CCMask) { if (Invert) CCMask ^= CCValid; - // Return the updated Glue link. - Glue = Select->getOperand(4); - return true; -} - -static bool combineMergeChains(SDValue &Chain, SDValue Glue) { - // We are about to glue an instruction with input chain Chain to the - // instruction Glue. Verify that this would not create an invalid - // topological sort due to intervening chain nodes. - - SDNode *Node = Glue.getNode(); - for (int ResNo = Node->getNumValues() - 1; ResNo >= 0; --ResNo) - if (Node->getValueType(ResNo) == MVT::Other) { - SDValue OutChain = SDValue(Node, ResNo); - // FIXME: We should be able to at least handle an intervening - // TokenFactor node by swapping chains around a bit ... - return Chain == OutChain; - } - + // Return the updated CCReg link. + CCReg = Select->getOperand(4); return true; } @@ -5551,15 +5525,14 @@ SDValue SystemZTargetLowering::combineBR_CCMASK( int CCValidVal = CCValid->getZExtValue(); int CCMaskVal = CCMask->getZExtValue(); SDValue Chain = N->getOperand(0); - SDValue Glue = N->getOperand(4); + SDValue CCReg = N->getOperand(4); - if (combineCCMask(Glue, CCValidVal, CCMaskVal) - && combineMergeChains(Chain, Glue)) + if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0), Chain, DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32), DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32), - N->getOperand(3), Glue); + N->getOperand(3), CCReg); return SDValue(); } @@ -5575,15 +5548,15 @@ SDValue SystemZTargetLowering::combineSELECT_CCMASK( int CCValidVal = CCValid->getZExtValue(); int CCMaskVal = CCMask->getZExtValue(); - SDValue Glue = N->getOperand(4); + SDValue CCReg = N->getOperand(4); - if (combineCCMask(Glue, CCValidVal, CCMaskVal)) + if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), N->getOperand(0), N->getOperand(1), DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32), DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32), - Glue); + CCReg); return SDValue(); } @@ -5951,6 +5924,103 @@ static unsigned forceReg(MachineInstr &MI, MachineOperand &Base, return Reg; } +// The CC operand of MI might be missing a kill marker because there +// were multiple uses of CC, and ISel didn't know which to mark. +// Figure out whether MI should have had a kill marker. +static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB) { + // Scan forward through BB for a use/def of CC. + MachineBasicBlock::iterator miI(std::next(MachineBasicBlock::iterator(MI))); + for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) { + const MachineInstr& mi = *miI; + if (mi.readsRegister(SystemZ::CC)) + return false; + if (mi.definesRegister(SystemZ::CC)) + break; // Should have kill-flag - update below. + } + + // If we hit the end of the block, check whether CC is live into a + // successor. + if (miI == MBB->end()) { + for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) + if ((*SI)->isLiveIn(SystemZ::CC)) + return false; + } + + return true; +} + +// Return true if it is OK for this Select pseudo-opcode to be cascaded +// together with other Select pseudo-opcodes into a single basic-block with +// a conditional jump around it. +static bool isSelectPseudo(MachineInstr &MI) { + switch (MI.getOpcode()) { + case SystemZ::Select32: + case SystemZ::Select64: + case SystemZ::SelectF32: + case SystemZ::SelectF64: + case SystemZ::SelectF128: + case SystemZ::SelectVR32: + case SystemZ::SelectVR64: + case SystemZ::SelectVR128: + return true; + + default: + return false; + } +} + +// Helper function, which inserts PHI functions into SinkMBB: +// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ], +// where %FalseValue(i) and %TrueValue(i) are taken from the consequent Selects +// in [MIItBegin, MIItEnd) range. +static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin, + MachineBasicBlock::iterator MIItEnd, + MachineBasicBlock *TrueMBB, + MachineBasicBlock *FalseMBB, + MachineBasicBlock *SinkMBB) { + MachineFunction *MF = TrueMBB->getParent(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + + unsigned CCValid = MIItBegin->getOperand(3).getImm(); + unsigned CCMask = MIItBegin->getOperand(4).getImm(); + DebugLoc DL = MIItBegin->getDebugLoc(); + + MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); + + // As we are creating the PHIs, we have to be careful if there is more than + // one. Later Selects may reference the results of earlier Selects, but later + // PHIs have to reference the individual true/false inputs from earlier PHIs. + // That also means that PHI construction must work forward from earlier to + // later, and that the code must maintain a mapping from earlier PHI's + // destination registers, and the registers that went into the PHI. + DenseMap> RegRewriteTable; + + for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) { + unsigned DestReg = MIIt->getOperand(0).getReg(); + unsigned TrueReg = MIIt->getOperand(1).getReg(); + unsigned FalseReg = MIIt->getOperand(2).getReg(); + + // If this Select we are generating is the opposite condition from + // the jump we generated, then we have to swap the operands for the + // PHI that is going to be generated. + if (MIIt->getOperand(4).getImm() == (CCValid ^ CCMask)) + std::swap(TrueReg, FalseReg); + + if (RegRewriteTable.find(TrueReg) != RegRewriteTable.end()) + TrueReg = RegRewriteTable[TrueReg].first; + + if (RegRewriteTable.find(FalseReg) != RegRewriteTable.end()) + FalseReg = RegRewriteTable[FalseReg].second; + + BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg) + .addReg(TrueReg).addMBB(TrueMBB) + .addReg(FalseReg).addMBB(FalseMBB); + + // Add this PHI to the rewrite table. + RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg); + } +} + // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. MachineBasicBlock * SystemZTargetLowering::emitSelect(MachineInstr &MI, @@ -5958,17 +6028,37 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI, const SystemZInstrInfo *TII = static_cast(Subtarget.getInstrInfo()); - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned TrueReg = MI.getOperand(1).getReg(); - unsigned FalseReg = MI.getOperand(2).getReg(); unsigned CCValid = MI.getOperand(3).getImm(); unsigned CCMask = MI.getOperand(4).getImm(); DebugLoc DL = MI.getDebugLoc(); + // If we have a sequence of Select* pseudo instructions using the + // same condition code value, we want to expand all of them into + // a single pair of basic blocks using the same condition. + MachineInstr *LastMI = &MI; + MachineBasicBlock::iterator NextMIIt = + std::next(MachineBasicBlock::iterator(MI)); + + if (isSelectPseudo(MI)) + while (NextMIIt != MBB->end() && isSelectPseudo(*NextMIIt) && + NextMIIt->getOperand(3).getImm() == CCValid && + (NextMIIt->getOperand(4).getImm() == CCMask || + NextMIIt->getOperand(4).getImm() == (CCValid ^ CCMask))) { + LastMI = &*NextMIIt; + ++NextMIIt; + } + MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); + // Unless CC was killed in the last Select instruction, mark it as + // live-in to both FalseMBB and JoinMBB. + if (!LastMI->killsRegister(SystemZ::CC) && !checkCCKill(*LastMI, JoinMBB)) { + FalseMBB->addLiveIn(SystemZ::CC); + JoinMBB->addLiveIn(SystemZ::CC); + } + // StartMBB: // BRC CCMask, JoinMBB // # fallthrough to FalseMBB @@ -5987,11 +6077,12 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI, // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ] // ... MBB = JoinMBB; - BuildMI(*MBB, MI, DL, TII->get(SystemZ::PHI), DestReg) - .addReg(TrueReg).addMBB(StartMBB) - .addReg(FalseReg).addMBB(FalseMBB); + MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI); + MachineBasicBlock::iterator MIItEnd = + std::next(MachineBasicBlock::iterator(LastMI)); + createPHIsForSelects(MIItBegin, MIItEnd, StartMBB, FalseMBB, MBB); - MI.eraseFromParent(); + StartMBB->erase(MIItBegin, MIItEnd); return JoinMBB; } @@ -6053,6 +6144,13 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); + // Unless CC was killed in the CondStore instruction, mark it as + // live-in to both FalseMBB and JoinMBB. + if (!MI.killsRegister(SystemZ::CC) && !checkCCKill(MI, JoinMBB)) { + FalseMBB->addLiveIn(SystemZ::CC); + JoinMBB->addLiveIn(SystemZ::CC); + } + // StartMBB: // BRC CCMask, JoinMBB // # fallthrough to FalseMBB diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index c0d19972cdb4..16a8291aba6e 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -142,11 +142,11 @@ enum NodeType : unsigned { // Transaction begin. The first operand is the chain, the second // the TDB pointer, and the third the immediate control field. - // Returns chain and glue. + // Returns CC value and chain. TBEGIN, TBEGIN_NOFLOAT, - // Transaction end. Just the chain operand. Returns chain and glue. + // Transaction end. Just the chain operand. Returns CC value and chain. TEND, // Create a vector constant by filling byte N of the result with bit @@ -308,8 +308,8 @@ enum NodeType : unsigned { // Operand 5: the width of the field in bits (8 or 16) ATOMIC_CMP_SWAPW, - // Atomic compare-and-swap returning glue (condition code). - // Val, OUTCHAIN, glue = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) + // Atomic compare-and-swap returning CC value. + // Val, CC, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) ATOMIC_CMP_SWAP, // 128-bit atomic load. @@ -321,7 +321,7 @@ enum NodeType : unsigned { ATOMIC_STORE_128, // 128-bit atomic compare-and-swap. - // Val, OUTCHAIN, glue = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) + // Val, CC, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) ATOMIC_CMP_SWAP_128, // Byte swapping load. diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td index 8c990ecc03e9..4e7d665ae9ee 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -3690,7 +3690,7 @@ class CompareRR opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRR { + [(set CC, (operator cls1:$R1, cls2:$R2))]> { let OpKey = mnemonic#cls1; let OpType = "reg"; let isCompare = 1; @@ -3700,7 +3700,7 @@ class CompareRRE opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRRE { + [(set CC, (operator cls1:$R1, cls2:$R2))]> { let OpKey = mnemonic#cls1; let OpType = "reg"; let isCompare = 1; @@ -3710,7 +3710,7 @@ class CompareRI opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> : InstRIa { + [(set CC, (operator cls:$R1, imm:$I2))]> { let isCompare = 1; } @@ -3718,7 +3718,7 @@ class CompareRIL opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> : InstRILa { + [(set CC, (operator cls:$R1, imm:$I2))]> { let isCompare = 1; } @@ -3726,7 +3726,7 @@ class CompareRILPC opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load> : InstRILb { + [(set CC, (operator cls:$R1, (load pcrel32:$RI2)))]> { let isCompare = 1; let mayLoad = 1; // We want PC-relative addresses to be tried ahead of BD and BDX addresses. @@ -3740,7 +3740,7 @@ class CompareRX opcode, SDPatternOperator operator, AddressingMode mode = bdxaddr12only> : InstRXa { + [(set CC, (operator cls:$R1, (load mode:$XBD2)))]> { let OpKey = mnemonic#"r"#cls; let OpType = "mem"; let isCompare = 1; @@ -3752,7 +3752,7 @@ class CompareRXE opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, bits<5> bytes> : InstRXE { + [(set CC, (operator cls:$R1, (load bdxaddr12only:$XBD2)))]> { let OpKey = mnemonic#"r"#cls; let OpType = "mem"; let isCompare = 1; @@ -3766,7 +3766,7 @@ class CompareRXY opcode, SDPatternOperator operator, AddressingMode mode = bdxaddr20only> : InstRXYa { + [(set CC, (operator cls:$R1, (load mode:$XBD2)))]> { let OpKey = mnemonic#"r"#cls; let OpType = "mem"; let isCompare = 1; @@ -3826,7 +3826,7 @@ class CompareSI opcode, SDPatternOperator operator, AddressingMode mode = bdaddr12only> : InstSI { + [(set CC, (operator (load mode:$BD1), imm:$I2))]> { let isCompare = 1; let mayLoad = 1; } @@ -3835,7 +3835,7 @@ class CompareSIL opcode, SDPatternOperator operator, SDPatternOperator load, Immediate imm> : InstSIL { + [(set CC, (operator (load bdaddr12only:$BD1), imm:$I2))]> { let isCompare = 1; let mayLoad = 1; } @@ -3845,7 +3845,7 @@ class CompareSIY opcode, SDPatternOperator operator, AddressingMode mode = bdaddr20only> : InstSIY { + [(set CC, (operator (load mode:$BD1), imm:$I2))]> { let isCompare = 1; let mayLoad = 1; } @@ -3866,7 +3866,7 @@ class CompareVRRa opcode, SDPatternOperator operator, TypedReg tr, bits<4> type> : InstVRRa { + [(set CC, (operator (tr.vt tr.op:$V1), (tr.vt tr.op:$V2)))]> { let isCompare = 1; let M3 = type; let M4 = 0; @@ -3895,14 +3895,26 @@ class CompareVRRh opcode> let isCompare = 1; } +class TestInherentS opcode, + SDPatternOperator operator> + : InstS { + let BD2 = 0; +} + class TestRXE opcode, SDPatternOperator operator, RegisterOperand cls> : InstRXE { + [(set CC, (operator cls:$R1, bdxaddr12only:$XBD2))]> { let M3 = 0; } +class TestBinarySIL opcode, + SDPatternOperator operator, Immediate imm> + : InstSIL; + class TestRSL opcode> : InstRSLa { @@ -4529,11 +4541,6 @@ class Pseudo pattern> let isCodeGenOnly = 1; } -// Like SideEffectBinarySIL, but expanded later. -class SideEffectBinarySILPseudo - : Pseudo<(outs), (ins bdaddr12only:$BD1, imm:$I2), - [(operator bdaddr12only:$BD1, imm:$I2)]>; - // Like UnaryRI, but expanded after RA depending on the choice of register. class UnaryRIPseudo @@ -4593,7 +4600,8 @@ multiclass BinaryRIAndKPseudo - : Pseudo<(outs), (ins cls:$R1, imm:$I2), [(operator cls:$R1, imm:$I2)]> { + : Pseudo<(outs), (ins cls:$R1, imm:$I2), + [(set CC, (operator cls:$R1, imm:$I2))]> { let isCompare = 1; } @@ -4602,13 +4610,18 @@ class CompareRXYPseudo bytes, AddressingMode mode = bdxaddr20only> : Pseudo<(outs), (ins cls:$R1, mode:$XBD2), - [(operator cls:$R1, (load mode:$XBD2))]> { + [(set CC, (operator cls:$R1, (load mode:$XBD2)))]> { let mayLoad = 1; let Has20BitOffset = 1; let HasIndex = 1; let AccessBytes = bytes; } +// Like TestBinarySIL, but expanded later. +class TestBinarySILPseudo + : Pseudo<(outs), (ins bdaddr12only:$BD1, imm:$I2), + [(set CC, (operator bdaddr12only:$BD1, imm:$I2))]>; + // Like CondBinaryRRF, but expanded after RA depending on the choice of // register. class CondBinaryRRFPseudo @@ -4690,17 +4703,13 @@ class SelectWrapper imm32zx4:$valid, imm32zx4:$cc))]> { let usesCustomInserter = 1; let hasNoSchedulingInfo = 1; - // Although the instructions used by these nodes do not in themselves - // change CC, the insertion requires new blocks, and CC cannot be live - // across them. - let Defs = [CC]; let Uses = [CC]; } // Stores $new to $addr if $cc is true ("" case) or false (Inv case). multiclass CondStores { - let Defs = [CC], Uses = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1, + let Uses = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1, mayLoad = 1, mayStore = 1 in { def "" : Pseudo<(outs), (ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc), @@ -4782,6 +4791,22 @@ multiclass MemorySS opcode, } } +// The same, but setting a CC result as comparion operator. +multiclass CompareMemorySS opcode, + SDPatternOperator sequence, SDPatternOperator loop> { + def "" : SideEffectBinarySSa; + let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in { + def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length), + [(set CC, (sequence bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length))]>; + def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length, GR64:$count256), + [(set CC, (loop bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length, GR64:$count256))]>; + } +} + // Define an instruction that operates on two strings, both terminated // by the character in R0. The instruction processes a CPU-determinated // number of bytes at a time and sets CC to 3 if the instruction needs @@ -4851,7 +4876,8 @@ class BinaryAliasVRRf // An alias of a CompareRI, but with different register sizes. class CompareAliasRI - : Alias<4, (outs), (ins cls:$R1, imm:$I2), [(operator cls:$R1, imm:$I2)]> { + : Alias<4, (outs), (ins cls:$R1, imm:$I2), + [(set CC, (operator cls:$R1, imm:$I2))]> { let isCompare = 1; } diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 572446c1aa12..c12d02b1793d 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -906,6 +906,23 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + // Move CC value from/to a GR32. + if (SrcReg == SystemZ::CC) { + auto MIB = BuildMI(MBB, MBBI, DL, get(SystemZ::IPM), DestReg); + if (KillSrc) { + const MachineFunction *MF = MBB.getParent(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + MIB->addRegisterKilled(SrcReg, TRI); + } + return; + } + if (DestReg == SystemZ::CC) { + BuildMI(MBB, MBBI, DL, get(SystemZ::TMLH)) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(3 << (SystemZ::IPM_CC - 16)); + return; + } + // Everything else needs only one instruction. unsigned Opcode; if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg)) diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index d3ddb64d396d..b5dd1c663357 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1493,7 +1493,7 @@ defm : ZXB; // Memory-to-memory comparison. let mayLoad = 1, Defs = [CC] in { - defm CLC : MemorySS<"clc", 0xD5, z_clc, z_clc_loop>; + defm CLC : CompareMemorySS<"clc", 0xD5, z_clc, z_clc_loop>; def CLCL : SideEffectBinaryMemMemRR<"clcl", 0x0F, GR128, GR128>; def CLCLE : SideEffectTernaryMemMemRS<"clcle", 0xA9, GR128, GR128>; def CLCLU : SideEffectTernaryMemMemRSY<"clclu", 0xEB8F, GR128, GR128>; @@ -1934,16 +1934,16 @@ let isCall = 1, Defs = [CC] in let hasSideEffects = 1, Predicates = [FeatureTransactionalExecution] in { // Transaction Begin let mayStore = 1, usesCustomInserter = 1, Defs = [CC] in { - def TBEGIN : SideEffectBinarySIL<"tbegin", 0xE560, z_tbegin, imm32zx16>; + def TBEGIN : TestBinarySIL<"tbegin", 0xE560, z_tbegin, imm32zx16>; let hasNoSchedulingInfo = 1 in - def TBEGIN_nofloat : SideEffectBinarySILPseudo; + def TBEGIN_nofloat : TestBinarySILPseudo; def TBEGINC : SideEffectBinarySIL<"tbeginc", 0xE561, int_s390_tbeginc, imm32zx16>; } // Transaction End let Defs = [CC] in - def TEND : SideEffectInherentS<"tend", 0xB2F8, z_tend>; + def TEND : TestInherentS<"tend", 0xB2F8, z_tend>; // Transaction Abort let isTerminator = 1, isBarrier = 1, mayStore = 1, diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td index d067f331f677..6bec1e0200bd 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -15,19 +15,24 @@ def SDT_CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i64>, def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i64>, SDTCisVT<1, i64>]>; def SDT_ZCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; -def SDT_ZCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; -def SDT_ZICmp : SDTypeProfile<0, 3, - [SDTCisSameAs<0, 1>, - SDTCisVT<2, i32>]>; -def SDT_ZBRCCMask : SDTypeProfile<0, 3, +def SDT_ZCmp : SDTypeProfile<1, 2, + [SDTCisVT<0, i32>, + SDTCisSameAs<1, 2>]>; +def SDT_ZICmp : SDTypeProfile<1, 3, + [SDTCisVT<0, i32>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, i32>]>; +def SDT_ZBRCCMask : SDTypeProfile<0, 4, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, - SDTCisVT<2, OtherVT>]>; -def SDT_ZSelectCCMask : SDTypeProfile<1, 4, + SDTCisVT<2, OtherVT>, + SDTCisVT<3, i32>]>; +def SDT_ZSelectCCMask : SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>, - SDTCisVT<4, i32>]>; + SDTCisVT<4, i32>, + SDTCisVT<5, i32>]>; def SDT_ZWrapPtr : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; @@ -47,45 +52,67 @@ def SDT_ZAtomicLoadBinaryW : SDTypeProfile<1, 5, SDTCisVT<3, i32>, SDTCisVT<4, i32>, SDTCisVT<5, i32>]>; -def SDT_ZAtomicCmpSwapW : SDTypeProfile<1, 6, +def SDT_ZAtomicCmpSwapW : SDTypeProfile<2, 6, [SDTCisVT<0, i32>, - SDTCisPtrTy<1>, - SDTCisVT<2, i32>, + SDTCisVT<1, i32>, + SDTCisPtrTy<2>, SDTCisVT<3, i32>, SDTCisVT<4, i32>, SDTCisVT<5, i32>, - SDTCisVT<6, i32>]>; -def SDT_ZAtomicCmpSwap : SDTypeProfile<1, 3, + SDTCisVT<6, i32>, + SDTCisVT<7, i32>]>; +def SDT_ZAtomicCmpSwap : SDTypeProfile<2, 3, [SDTCisInt<0>, - SDTCisPtrTy<1>, - SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>]>; + SDTCisVT<1, i32>, + SDTCisPtrTy<2>, + SDTCisSameAs<0, 3>, + SDTCisSameAs<0, 4>]>; def SDT_ZAtomicLoad128 : SDTypeProfile<1, 1, [SDTCisVT<0, untyped>, SDTCisPtrTy<1>]>; def SDT_ZAtomicStore128 : SDTypeProfile<0, 2, [SDTCisVT<0, untyped>, SDTCisPtrTy<1>]>; -def SDT_ZAtomicCmpSwap128 : SDTypeProfile<1, 3, +def SDT_ZAtomicCmpSwap128 : SDTypeProfile<2, 3, [SDTCisVT<0, untyped>, - SDTCisPtrTy<1>, - SDTCisVT<2, untyped>, - SDTCisVT<3, untyped>]>; + SDTCisVT<1, i32>, + SDTCisPtrTy<2>, + SDTCisVT<3, untyped>, + SDTCisVT<4, untyped>]>; def SDT_ZMemMemLength : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisVT<2, i64>]>; +def SDT_ZMemMemLengthCC : SDTypeProfile<1, 3, + [SDTCisVT<0, i32>, + SDTCisPtrTy<1>, + SDTCisPtrTy<2>, + SDTCisVT<3, i64>]>; def SDT_ZMemMemLoop : SDTypeProfile<0, 4, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisVT<2, i64>, SDTCisVT<3, i64>]>; +def SDT_ZMemMemLoopCC : SDTypeProfile<1, 4, + [SDTCisVT<0, i32>, + SDTCisPtrTy<1>, + SDTCisPtrTy<2>, + SDTCisVT<3, i64>, + SDTCisVT<4, i64>]>; def SDT_ZString : SDTypeProfile<1, 3, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisPtrTy<2>, SDTCisVT<3, i32>]>; -def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>; +def SDT_ZStringCC : SDTypeProfile<2, 3, + [SDTCisPtrTy<0>, + SDTCisVT<1, i32>, + SDTCisPtrTy<2>, + SDTCisPtrTy<3>, + SDTCisVT<4, i32>]>; +def SDT_ZIPM : SDTypeProfile<1, 1, + [SDTCisVT<0, i32>, + SDTCisVT<1, i32>]>; def SDT_ZPrefetch : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; @@ -97,9 +124,12 @@ def SDT_ZStoreBSwap : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>; -def SDT_ZTBegin : SDTypeProfile<0, 2, - [SDTCisPtrTy<0>, - SDTCisVT<1, i32>]>; +def SDT_ZTBegin : SDTypeProfile<1, 2, + [SDTCisVT<0, i32>, + SDTCisPtrTy<1>, + SDTCisVT<2, i32>]>; +def SDT_ZTEnd : SDTypeProfile<1, 0, + [SDTCisVT<0, i32>]>; def SDT_ZInsertVectorElt : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, @@ -115,10 +145,19 @@ def SDT_ZVecUnaryConv : SDTypeProfile<1, 1, def SDT_ZVecUnary : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; +def SDT_ZVecUnaryCC : SDTypeProfile<2, 1, + [SDTCisVec<0>, + SDTCisVT<1, i32>, + SDTCisSameAs<0, 2>]>; def SDT_ZVecBinary : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>; +def SDT_ZVecBinaryCC : SDTypeProfile<2, 2, + [SDTCisVec<0>, + SDTCisVT<1, i32>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 2>]>; def SDT_ZVecBinaryInt : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, @@ -127,10 +166,16 @@ def SDT_ZVecBinaryConv : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1, 2>]>; -def SDT_ZVecBinaryConvInt : SDTypeProfile<1, 2, +def SDT_ZVecBinaryConvCC : SDTypeProfile<2, 2, [SDTCisVec<0>, - SDTCisVec<1>, - SDTCisVT<2, i32>]>; + SDTCisVT<1, i32>, + SDTCisVec<2>, + SDTCisSameAs<2, 3>]>; +def SDT_ZVecBinaryConvIntCC : SDTypeProfile<2, 2, + [SDTCisVec<0>, + SDTCisVT<1, i32>, + SDTCisVec<2>, + SDTCisVT<3, i32>]>; def SDT_ZRotateMask : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>, @@ -149,13 +194,28 @@ def SDT_ZVecTernaryInt : SDTypeProfile<1, 3, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; +def SDT_ZVecTernaryIntCC : SDTypeProfile<2, 3, + [SDTCisVec<0>, + SDTCisVT<1, i32>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisVT<4, i32>]>; def SDT_ZVecQuaternaryInt : SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisVT<4, i32>]>; -def SDT_ZTest : SDTypeProfile<0, 2, [SDTCisVT<1, i64>]>; +def SDT_ZVecQuaternaryIntCC : SDTypeProfile<2, 4, + [SDTCisVec<0>, + SDTCisVT<1, i32>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisSameAs<0, 4>, + SDTCisVT<5, i32>]>; +def SDT_ZTest : SDTypeProfile<1, 2, + [SDTCisVT<0, i32>, + SDTCisVT<2, i64>]>; //===----------------------------------------------------------------------===// // Node definitions @@ -188,13 +248,14 @@ def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>; def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET", SDT_ZWrapOffset, []>; def z_iabs : SDNode<"SystemZISD::IABS", SDTIntUnaryOp, []>; -def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp, [SDNPOutGlue]>; -def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp, [SDNPOutGlue]>; -def z_tm : SDNode<"SystemZISD::TM", SDT_ZICmp, [SDNPOutGlue]>; -def z_br_ccmask : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask, - [SDNPHasChain, SDNPInGlue]>; -def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask, - [SDNPInGlue]>; +def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp>; +def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp>; +def z_tm : SDNode<"SystemZISD::TM", SDT_ZICmp>; +def z_br_ccmask_1 : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask, + [SDNPHasChain]>; +def z_select_ccmask_1 : SDNode<"SystemZISD::SELECT_CCMASK", + SDT_ZSelectCCMask>; +def z_ipm_1 : SDNode<"SystemZISD::IPM", SDT_ZIPM>; def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>; def z_smul_lohi : SDNode<"SystemZISD::SMUL_LOHI", SDT_ZGR128Binary>; @@ -210,7 +271,7 @@ def z_loadbswap : SDNode<"SystemZISD::LRV", SDT_ZLoadBSwap, def z_storebswap : SDNode<"SystemZISD::STRV", SDT_ZStoreBSwap, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; -def z_tdc : SDNode<"SystemZISD::TDC", SDT_ZTest, [SDNPOutGlue]>; +def z_tdc : SDNode<"SystemZISD::TDC", SDT_ZTest>; // Defined because the index is an i32 rather than a pointer. def z_vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT", @@ -229,10 +290,8 @@ def z_permute_dwords : SDNode<"SystemZISD::PERMUTE_DWORDS", SDT_ZVecTernaryInt>; def z_permute : SDNode<"SystemZISD::PERMUTE", SDT_ZVecTernary>; def z_pack : SDNode<"SystemZISD::PACK", SDT_ZVecBinaryConv>; -def z_packs_cc : SDNode<"SystemZISD::PACKS_CC", SDT_ZVecBinaryConv, - [SDNPOutGlue]>; -def z_packls_cc : SDNode<"SystemZISD::PACKLS_CC", SDT_ZVecBinaryConv, - [SDNPOutGlue]>; +def z_packs_cc : SDNode<"SystemZISD::PACKS_CC", SDT_ZVecBinaryConvCC>; +def z_packls_cc : SDNode<"SystemZISD::PACKLS_CC", SDT_ZVecBinaryConvCC>; def z_unpack_high : SDNode<"SystemZISD::UNPACK_HIGH", SDT_ZVecUnaryConv>; def z_unpackl_high : SDNode<"SystemZISD::UNPACKL_HIGH", SDT_ZVecUnaryConv>; def z_unpack_low : SDNode<"SystemZISD::UNPACK_LOW", SDT_ZVecUnaryConv>; @@ -247,44 +306,30 @@ def z_vsum : SDNode<"SystemZISD::VSUM", SDT_ZVecBinaryConv>; def z_vicmpe : SDNode<"SystemZISD::VICMPE", SDT_ZVecBinary>; def z_vicmph : SDNode<"SystemZISD::VICMPH", SDT_ZVecBinary>; def z_vicmphl : SDNode<"SystemZISD::VICMPHL", SDT_ZVecBinary>; -def z_vicmpes : SDNode<"SystemZISD::VICMPES", SDT_ZVecBinary, - [SDNPOutGlue]>; -def z_vicmphs : SDNode<"SystemZISD::VICMPHS", SDT_ZVecBinary, - [SDNPOutGlue]>; -def z_vicmphls : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecBinary, - [SDNPOutGlue]>; +def z_vicmpes : SDNode<"SystemZISD::VICMPES", SDT_ZVecBinaryCC>; +def z_vicmphs : SDNode<"SystemZISD::VICMPHS", SDT_ZVecBinaryCC>; +def z_vicmphls : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecBinaryCC>; def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>; def z_vfcmph : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>; def z_vfcmphe : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>; -def z_vfcmpes : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConv, - [SDNPOutGlue]>; -def z_vfcmphs : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConv, - [SDNPOutGlue]>; -def z_vfcmphes : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConv, - [SDNPOutGlue]>; +def z_vfcmpes : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConvCC>; +def z_vfcmphs : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConvCC>; +def z_vfcmphes : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConvCC>; def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>; def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>; -def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp, [SDNPOutGlue]>; -def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryInt, - [SDNPOutGlue]>; -def z_vfaez_cc : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryInt, - [SDNPOutGlue]>; -def z_vfee_cc : SDNode<"SystemZISD::VFEE_CC", SDT_ZVecBinary, - [SDNPOutGlue]>; -def z_vfeez_cc : SDNode<"SystemZISD::VFEEZ_CC", SDT_ZVecBinary, - [SDNPOutGlue]>; -def z_vfene_cc : SDNode<"SystemZISD::VFENE_CC", SDT_ZVecBinary, - [SDNPOutGlue]>; -def z_vfenez_cc : SDNode<"SystemZISD::VFENEZ_CC", SDT_ZVecBinary, - [SDNPOutGlue]>; -def z_vistr_cc : SDNode<"SystemZISD::VISTR_CC", SDT_ZVecUnary, - [SDNPOutGlue]>; -def z_vstrc_cc : SDNode<"SystemZISD::VSTRC_CC", SDT_ZVecQuaternaryInt, - [SDNPOutGlue]>; +def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp>; +def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryIntCC>; +def z_vfaez_cc : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryIntCC>; +def z_vfee_cc : SDNode<"SystemZISD::VFEE_CC", SDT_ZVecBinaryCC>; +def z_vfeez_cc : SDNode<"SystemZISD::VFEEZ_CC", SDT_ZVecBinaryCC>; +def z_vfene_cc : SDNode<"SystemZISD::VFENE_CC", SDT_ZVecBinaryCC>; +def z_vfenez_cc : SDNode<"SystemZISD::VFENEZ_CC", SDT_ZVecBinaryCC>; +def z_vistr_cc : SDNode<"SystemZISD::VISTR_CC", SDT_ZVecUnaryCC>; +def z_vstrc_cc : SDNode<"SystemZISD::VSTRC_CC", + SDT_ZVecQuaternaryIntCC>; def z_vstrcz_cc : SDNode<"SystemZISD::VSTRCZ_CC", - SDT_ZVecQuaternaryInt, [SDNPOutGlue]>; -def z_vftci : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvInt, - [SDNPOutGlue]>; + SDT_ZVecQuaternaryIntCC>; +def z_vftci : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvIntCC>; class AtomicWOp : SDNode<"SystemZISD::"##name, profile, @@ -305,11 +350,11 @@ def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">; def z_atomic_cmp_swap : SDNode<"SystemZISD::ATOMIC_CMP_SWAP", SDT_ZAtomicCmpSwap, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, - SDNPOutGlue, SDNPMemOperand]>; + SDNPMemOperand]>; def z_atomic_cmp_swapw : SDNode<"SystemZISD::ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, - SDNPOutGlue, SDNPMemOperand]>; + SDNPMemOperand]>; def z_atomic_load_128 : SDNode<"SystemZISD::ATOMIC_LOAD_128", SDT_ZAtomicLoad128, @@ -320,7 +365,7 @@ def z_atomic_store_128 : SDNode<"SystemZISD::ATOMIC_STORE_128", def z_atomic_cmp_swap_128 : SDNode<"SystemZISD::ATOMIC_CMP_SWAP_128", SDT_ZAtomicCmpSwap128, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, - SDNPOutGlue, SDNPMemOperand]>; + SDNPMemOperand]>; def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; @@ -338,30 +383,26 @@ def z_xc : SDNode<"SystemZISD::XC", SDT_ZMemMemLength, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; def z_xc_loop : SDNode<"SystemZISD::XC_LOOP", SDT_ZMemMemLoop, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; -def z_clc : SDNode<"SystemZISD::CLC", SDT_ZMemMemLength, - [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; -def z_clc_loop : SDNode<"SystemZISD::CLC_LOOP", SDT_ZMemMemLoop, - [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; -def z_strcmp : SDNode<"SystemZISD::STRCMP", SDT_ZString, - [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; +def z_clc : SDNode<"SystemZISD::CLC", SDT_ZMemMemLengthCC, + [SDNPHasChain, SDNPMayLoad]>; +def z_clc_loop : SDNode<"SystemZISD::CLC_LOOP", SDT_ZMemMemLoopCC, + [SDNPHasChain, SDNPMayLoad]>; +def z_strcmp : SDNode<"SystemZISD::STRCMP", SDT_ZStringCC, + [SDNPHasChain, SDNPMayLoad]>; def z_stpcpy : SDNode<"SystemZISD::STPCPY", SDT_ZString, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; -def z_search_string : SDNode<"SystemZISD::SEARCH_STRING", SDT_ZString, - [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; -def z_ipm : SDNode<"SystemZISD::IPM", SDT_ZI32Intrinsic, - [SDNPInGlue]>; +def z_search_string : SDNode<"SystemZISD::SEARCH_STRING", SDT_ZStringCC, + [SDNPHasChain, SDNPMayLoad]>; def z_prefetch : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; def z_tbegin : SDNode<"SystemZISD::TBEGIN", SDT_ZTBegin, - [SDNPHasChain, SDNPOutGlue, SDNPMayStore, - SDNPSideEffect]>; + [SDNPHasChain, SDNPMayStore, SDNPSideEffect]>; def z_tbegin_nofloat : SDNode<"SystemZISD::TBEGIN_NOFLOAT", SDT_ZTBegin, - [SDNPHasChain, SDNPOutGlue, SDNPMayStore, - SDNPSideEffect]>; -def z_tend : SDNode<"SystemZISD::TEND", SDTNone, - [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; + [SDNPHasChain, SDNPMayStore, SDNPSideEffect]>; +def z_tend : SDNode<"SystemZISD::TEND", SDT_ZTEnd, + [SDNPHasChain, SDNPSideEffect]>; def z_vshl : SDNode<"ISD::SHL", SDT_ZVecBinary>; def z_vsra : SDNode<"ISD::SRA", SDT_ZVecBinary>; @@ -382,6 +423,16 @@ def z_strv : PatFrag<(ops node:$src, node:$addr), def z_strvg : PatFrag<(ops node:$src, node:$addr), (z_storebswap node:$src, node:$addr, i64)>; +// Fragments including CC as an implicit source. +def z_br_ccmask + : PatFrag<(ops node:$valid, node:$mask, node:$bb), + (z_br_ccmask_1 node:$valid, node:$mask, node:$bb, CC)>; +def z_select_ccmask + : PatFrag<(ops node:$true, node:$false, node:$valid, node:$mask), + (z_select_ccmask_1 node:$true, node:$false, + node:$valid, node:$mask, CC)>; +def z_ipm : PatFrag<(ops), (z_ipm_1 CC)>; + // Signed and unsigned comparisons. def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{ unsigned Type = cast(N->getOperand(2))->getZExtValue(); diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 91c7d1f6e85e..76ed6f80ba55 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -315,3 +315,11 @@ SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const SystemZFrameLowering *TFI = getFrameLowering(MF); return TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D; } + +const TargetRegisterClass * +SystemZRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { + if (RC == &SystemZ::CCRRegClass) + return &SystemZ::GR32BitRegClass; + return RC; +} + diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h index 8787a90b1e25..d64f77662012 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -44,6 +44,12 @@ public: return &SystemZ::ADDR64BitRegClass; } + /// getCrossCopyRegClass - Returns a legal register class to copy a register + /// in the specified class to or from. Returns NULL if it is possible to copy + /// between a two registers of the specified class. + const TargetRegisterClass * + getCrossCopyRegClass(const TargetRegisterClass *RC) const override; + bool getRegAllocationHints(unsigned VirtReg, ArrayRef Order, SmallVectorImpl &Hints, diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td index a183d7839a6e..79ba7534f92c 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -296,8 +296,8 @@ def v128any : TypedReg; // The 2-bit condition code field of the PSW. Every register named in an // inline asm needs a class associated with it. def CC : SystemZReg<"cc">; -let isAllocatable = 0 in - def CCRegs : RegisterClass<"SystemZ", [i32], 32, (add CC)>; +let isAllocatable = 0, CopyCost = -1 in + def CCR : RegisterClass<"SystemZ", [i32], 32, (add CC)>; // Access registers. class ACR32 num, string n> : SystemZReg { diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 657482504045..e0d7bca9a94b 100644 --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -145,7 +145,7 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset( // deciding whether to use a loop or straight-line code. static SDValue emitCLC(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1, SDValue Src2, uint64_t Size) { - SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); EVT PtrVT = Src1.getValueType(); // A two-CLC sequence is a clear win over a loop, not least because it // needs only one branch. A three-CLC sequence needs the same number @@ -167,9 +167,9 @@ static SDValue emitCLC(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, // less than zero if CC == 1 and greater than zero if CC >= 2. // The sequence starts with IPM, which puts CC into bits 29 and 28 // of an integer and clears bits 30 and 31. -static SDValue addIPMSequence(const SDLoc &DL, SDValue Glue, +static SDValue addIPMSequence(const SDLoc &DL, SDValue CCReg, SelectionDAG &DAG) { - SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); + SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL, @@ -184,9 +184,9 @@ std::pair SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp( if (auto *CSize = dyn_cast(Size)) { uint64_t Bytes = CSize->getZExtValue(); assert(Bytes > 0 && "Caller should have handled 0-size case"); - Chain = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes); - SDValue Glue = Chain.getValue(1); - return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); + SDValue CCReg = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes); + Chain = CCReg.getValue(1); + return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain); } return std::make_pair(SDValue(), SDValue()); } @@ -196,7 +196,7 @@ std::pair SystemZSelectionDAGInfo::EmitTargetCodeForMemchr( SDValue Char, SDValue Length, MachinePointerInfo SrcPtrInfo) const { // Use SRST to find the character. End is its address on success. EVT PtrVT = Src.getValueType(); - SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); + SDVTList VTs = DAG.getVTList(PtrVT, MVT::i32, MVT::Other); Length = DAG.getZExtOrTrunc(Length, DL, PtrVT); Char = DAG.getZExtOrTrunc(Char, DL, MVT::i32); Char = DAG.getNode(ISD::AND, DL, MVT::i32, Char, @@ -204,17 +204,16 @@ std::pair SystemZSelectionDAGInfo::EmitTargetCodeForMemchr( SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, Length); SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, Limit, Src, Char); - Chain = End.getValue(1); - SDValue Glue = End.getValue(2); + SDValue CCReg = End.getValue(1); + Chain = End.getValue(2); // Now select between End and null, depending on whether the character // was found. SDValue Ops[] = {End, DAG.getConstant(0, DL, PtrVT), DAG.getConstant(SystemZ::CCMASK_SRST, DL, MVT::i32), DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32), - Glue}; - VTs = DAG.getVTList(PtrVT, MVT::Glue); - End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops); + CCReg}; + End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, PtrVT, Ops); return std::make_pair(End, Chain); } @@ -232,12 +231,12 @@ std::pair SystemZSelectionDAGInfo::EmitTargetCodeForStrcmp( SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1, SDValue Src2, MachinePointerInfo Op1PtrInfo, MachinePointerInfo Op2PtrInfo) const { - SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::Other, MVT::Glue); + SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::i32, MVT::Other); SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2, DAG.getConstant(0, DL, MVT::i32)); - Chain = Unused.getValue(1); - SDValue Glue = Chain.getValue(2); - return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); + SDValue CCReg = Unused.getValue(1); + Chain = Unused.getValue(2); + return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain); } // Search from Src for a null character, stopping once Src reaches Limit. @@ -250,10 +249,10 @@ static std::pair getBoundedStrlen(SelectionDAG &DAG, SDValue Chain, SDValue Src, SDValue Limit) { EVT PtrVT = Src.getValueType(); - SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); + SDVTList VTs = DAG.getVTList(PtrVT, MVT::i32, MVT::Other); SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, Limit, Src, DAG.getConstant(0, DL, MVT::i32)); - Chain = End.getValue(1); + Chain = End.getValue(2); SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src); return std::make_pair(Len, Chain); } diff --git a/llvm/test/CodeGen/SystemZ/cond-move-02.ll b/llvm/test/CodeGen/SystemZ/cond-move-02.ll index 2e2bacd2532d..ea0ef98335ce 100644 --- a/llvm/test/CodeGen/SystemZ/cond-move-02.ll +++ b/llvm/test/CodeGen/SystemZ/cond-move-02.ll @@ -4,9 +4,9 @@ define i32 @f1(i32 %x) { ; CHECK-LABEL: f1: -; CHECK: lhi [[REG:%r[0-5]]], 0 ; CHECK: chi %r2, 0 -; CHECK: lochilh [[REG]], 42 +; CHECK: lhi %r2, 0 +; CHECK: lochilh %r2, 42 ; CHECK: br %r14 %cond = icmp ne i32 %x, 0 %res = select i1 %cond, i32 42, i32 0 @@ -35,9 +35,9 @@ define i32 @f3(i32 %x, i32 %y) { define i64 @f4(i64 %x) { ; CHECK-LABEL: f4: -; CHECK: lghi [[REG:%r[0-5]]], 0 ; CHECK: cghi %r2, 0 -; CHECK: locghilh [[REG]], 42 +; CHECK: lghi %r2, 0 +; CHECK: locghilh %r2, 42 ; CHECK: br %r14 %cond = icmp ne i64 %x, 0 %res = select i1 %cond, i64 42, i64 0 diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-48.ll b/llvm/test/CodeGen/SystemZ/int-cmp-48.ll index 821283d19d0c..c68a67357648 100644 --- a/llvm/test/CodeGen/SystemZ/int-cmp-48.ll +++ b/llvm/test/CodeGen/SystemZ/int-cmp-48.ll @@ -28,9 +28,8 @@ exit: ; Check that we do not fold across an aliasing store. define void @f2(i8 *%src) { ; CHECK-LABEL: f2: -; CHECK: llc [[REG:%r[0-5]]], 0(%r2) -; CHECK-DAG: mvi 0(%r2), 0 -; CHECK-DAG: tmll [[REG]], 1 +; CHECK: tm 0(%r2), 1 +; CHECK: mvi 0(%r2), 0 ; CHECK: ber %r14 ; CHECK: br %r14 entry: diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-55.ll b/llvm/test/CodeGen/SystemZ/int-cmp-55.ll new file mode 100644 index 000000000000..5028784197d9 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-cmp-55.ll @@ -0,0 +1,19 @@ +; Check that we don't insert unnecessary CC spills +; +; RUN: llc < %s -mtriple=s390x-linux-gnu + +declare signext i32 @f() + +define signext i32 @test(i32* %ptr) { +; CHECK-NOT: ipm + +entry: + %0 = load i32, i32* %ptr, align 4 + %tobool = icmp eq i32 %0, 0 + %call = tail call signext i32 @f() + %1 = icmp slt i32 %call, 40 + %2 = or i1 %tobool, %1 + %retv = select i1 %2, i32 %call, i32 40 + ret i32 %retv +} + diff --git a/llvm/test/CodeGen/SystemZ/multiselect.ll b/llvm/test/CodeGen/SystemZ/multiselect.ll new file mode 100644 index 000000000000..d2078ff291ba --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/multiselect.ll @@ -0,0 +1,21 @@ +; Test that multiple select statements using the same condition are expanded +; into a single conditional branch. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define void @test(i32 signext %positive, double %base, double %offset, double* %rmin, double* %rmax) { +entry: +; CHECK: cijlh %r2, 0, +; CHECK-NOT: cij +; CHECK-NOT: je +; CHECK-NOT: jlh + + %tobool = icmp eq i32 %positive, 0 + %add = fadd double %base, %offset + %min = select i1 %tobool, double %add, double %base + %max = select i1 %tobool, double %base, double %add + store double %min, double* %rmin, align 8 + store double %max, double* %rmax, align 8 + ret void +} +