forked from OSchip/llvm-project
Eliminate redundant CR moves on PPC32.
The 32-bit ABI requires CR bit 6 to be set if the call has fp arguments and unset if it doesn't. The solution up to now was to insert a MachineNode to set/unset the CR bit, which produces a CR vreg. This vreg was then copied into CR bit 6. When the register allocator saw a bunch of these in the same function, it allocated the set/unset CR bit in some random CR register (1 extra instruction) and then emitted CR moves before every vararg function call, rather than just setting and unsetting CR bit 6 directly before every vararg function call. This patch instead inserts a PPCcrset/PPCcrunset instruction which are then matched by a dedicated instruction pattern. Patch by Tobias von Koch. llvm-svn: 162725
This commit is contained in:
parent
e39526a789
commit
5ab378037f
|
@ -517,6 +517,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
|
||||
case PPCISD::MTFSF: return "PPCISD::MTFSF";
|
||||
case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
|
||||
case PPCISD::CR6SET: return "PPCISD::CR6SET";
|
||||
case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2834,6 +2836,10 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
|
|||
isTailCall, RegsToPass, Ops, NodeTys,
|
||||
PPCSubTarget);
|
||||
|
||||
// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
|
||||
if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
|
||||
Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
|
||||
|
||||
// When performing tail call optimization the callee pops its arguments off
|
||||
// the stack. Account for this here so these bytes can be pushed back on in
|
||||
// PPCRegisterInfo::eliminateCallFramePseudoInstr.
|
||||
|
@ -3131,14 +3137,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
|
|||
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
||||
&MemOpChains[0], MemOpChains.size());
|
||||
|
||||
// Set CR6 to true if this is a vararg call with floating args passed in
|
||||
// registers.
|
||||
if (isVarArg) {
|
||||
SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET,
|
||||
dl, MVT::i32), 0);
|
||||
RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR));
|
||||
}
|
||||
|
||||
// Build a sequence of copy-to-reg nodes chained together with token chain
|
||||
// and flag operands which copy the outgoing args into the appropriate regs.
|
||||
SDValue InFlag;
|
||||
|
@ -3148,6 +3146,14 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
|
|||
InFlag = Chain.getValue(1);
|
||||
}
|
||||
|
||||
// Set CR bit 6 to true if this is a vararg call with floating args passed in
|
||||
// registers.
|
||||
if (isVarArg) {
|
||||
Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
|
||||
dl, DAG.getVTList(MVT::Other, MVT::Glue), Chain);
|
||||
InFlag = Chain.getValue(1);
|
||||
}
|
||||
|
||||
if (isTailCall)
|
||||
PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
|
||||
false, TailCallArguments);
|
||||
|
|
|
@ -174,6 +174,10 @@ namespace llvm {
|
|||
/// operand #3 optional in flag
|
||||
TC_RETURN,
|
||||
|
||||
/// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
|
||||
CR6SET,
|
||||
CR6UNSET,
|
||||
|
||||
/// STD_32 - This is the STD instruction for use with "32-bit" registers.
|
||||
STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
|
||||
|
||||
|
|
|
@ -155,6 +155,12 @@ def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
|
|||
def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
|
||||
[SDNPHasChain, SDNPMayStore]>;
|
||||
|
||||
// Instructions to set/unset CR bit 6 for SVR4 vararg calls
|
||||
def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
|
||||
def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
|
||||
|
||||
// Instructions to support atomic operations
|
||||
def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
|
||||
[SDNPHasChain, SDNPMayLoad]>;
|
||||
|
@ -1145,6 +1151,16 @@ def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins),
|
|||
"crxor $dst, $dst, $dst", BrCR,
|
||||
[]>;
|
||||
|
||||
let Defs = [CR1EQ], CRD = 6 in {
|
||||
def CR6SET : XLForm_1_ext<19, 289, (outs), (ins),
|
||||
"creqv 6, 6, 6", BrCR,
|
||||
[(PPCcr6set)]>;
|
||||
|
||||
def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
|
||||
"crxor 6, 6, 6", BrCR,
|
||||
[(PPCcr6unset)]>;
|
||||
}
|
||||
|
||||
// XFX-Form instructions. Instructions that deal with SPRs.
|
||||
//
|
||||
let Uses = [CTR] in {
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
; RUN: llc < %s | FileCheck %s
|
||||
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
|
||||
target triple = "powerpc-unknown-linux"
|
||||
|
||||
@.str = private unnamed_addr constant [3 x i8] c"%i\00", align 1
|
||||
|
||||
define void @test(i32 %count) nounwind {
|
||||
entry:
|
||||
; CHECK: crxor 6, 6, 6
|
||||
%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind
|
||||
%cmp2 = icmp sgt i32 %count, 0
|
||||
br i1 %cmp2, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
|
||||
; CHECK: crxor 6, 6, 6
|
||||
%call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind
|
||||
%inc = add nsw i32 %i.03, 1
|
||||
%exitcond = icmp eq i32 %inc, %count
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @printf(i8* nocapture, ...) nounwind
|
Loading…
Reference in New Issue