forked from OSchip/llvm-project
Compile this:
void foo(float a, int *b) { *b = a; } to this: _foo: fctiwz f0, f1 stfiwx f0, 0, r4 blr instead of this: _foo: fctiwz f0, f1 stfd f0, -8(r1) lwz r2, -4(r1) stw r2, 0(r4) blr This implements CodeGen/PowerPC/stfiwx.ll, and also incidentally does the right thing for GCC bugzilla 26505. llvm-svn: 26447
This commit is contained in:
parent
160cc92461
commit
27f5345b1f
|
@ -187,6 +187,7 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
|
|||
|
||||
// We have target-specific dag combine patterns for the following nodes:
|
||||
setTargetDAGCombine(ISD::SINT_TO_FP);
|
||||
setTargetDAGCombine(ISD::STORE);
|
||||
|
||||
computeRegisterProperties();
|
||||
}
|
||||
|
@ -198,6 +199,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case PPCISD::FCFID: return "PPCISD::FCFID";
|
||||
case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
|
||||
case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
|
||||
case PPCISD::STFIWX: return "PPCISD::STFIWX";
|
||||
case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
|
||||
case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
|
||||
case PPCISD::Hi: return "PPCISD::Hi";
|
||||
|
@ -1032,6 +1034,25 @@ SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N,
|
|||
}
|
||||
}
|
||||
break;
|
||||
case ISD::STORE:
|
||||
// Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
|
||||
if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
|
||||
N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
|
||||
N->getOperand(1).getValueType() == MVT::i32) {
|
||||
SDOperand Val = N->getOperand(1).getOperand(0);
|
||||
if (Val.getValueType() == MVT::f32) {
|
||||
Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
|
||||
DCI.AddToWorklist(Val.Val);
|
||||
}
|
||||
Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val);
|
||||
DCI.AddToWorklist(Val.Val);
|
||||
|
||||
Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val,
|
||||
N->getOperand(2), N->getOperand(3));
|
||||
DCI.AddToWorklist(Val.Val);
|
||||
return Val;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return SDOperand();
|
||||
|
|
|
@ -39,6 +39,11 @@ namespace llvm {
|
|||
/// of that FP value.
|
||||
FCTIDZ, FCTIWZ,
|
||||
|
||||
/// STFIWX - The STFIWX instruction. The first operand is an input token
|
||||
/// chain, then an f64 value to store, then an address to store it to,
|
||||
/// then a SRCVALUE for the address.
|
||||
STFIWX,
|
||||
|
||||
// VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
|
||||
// three v4f32 operands and producing a v4f32 result.
|
||||
VMADDFP, VNMSUBFP,
|
||||
|
|
|
@ -14,6 +14,18 @@
|
|||
|
||||
include "PPCInstrFormats.td"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PowerPC specific type constraints.
|
||||
//
|
||||
def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
|
||||
SDTCisVT<0, f64>, SDTCisPtrTy<1>
|
||||
]>;
|
||||
def SDT_PPCShiftOp : SDTypeProfile<1, 2, [ // PPCshl, PPCsra, PPCsrl
|
||||
SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>
|
||||
]>;
|
||||
def SDT_PPCCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
|
||||
def SDT_PPCRetFlag : SDTypeProfile<0, 0, []>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PowerPC specific DAG Nodes.
|
||||
//
|
||||
|
@ -21,6 +33,7 @@ include "PPCInstrFormats.td"
|
|||
def PPCfcfid : SDNode<"PPCISD::FCFID" , SDTFPUnaryOp, []>;
|
||||
def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
|
||||
def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
|
||||
def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx, [SDNPHasChain]>;
|
||||
|
||||
def PPCfsel : SDNode<"PPCISD::FSEL",
|
||||
// Type constraint for fsel.
|
||||
|
@ -34,19 +47,14 @@ def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
|
|||
|
||||
// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
|
||||
// amounts. These nodes are generated by the multi-precision shift code.
|
||||
def SDT_PPCShiftOp : SDTypeProfile<1, 2, [ // PPCshl, PPCsra, PPCsrl
|
||||
SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>
|
||||
]>;
|
||||
def PPCsrl : SDNode<"PPCISD::SRL" , SDT_PPCShiftOp>;
|
||||
def PPCsra : SDNode<"PPCISD::SRA" , SDT_PPCShiftOp>;
|
||||
def PPCshl : SDNode<"PPCISD::SHL" , SDT_PPCShiftOp>;
|
||||
|
||||
// These are target-independent nodes, but have target-specific formats.
|
||||
def SDT_PPCCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
|
||||
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeq,[SDNPHasChain]>;
|
||||
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeq,[SDNPHasChain]>;
|
||||
|
||||
def SDT_PPCRetFlag : SDTypeProfile<0, 0, []>;
|
||||
def retflag : SDNode<"PPCISD::RET_FLAG", SDT_PPCRetFlag,
|
||||
[SDNPHasChain, SDNPOptInFlag]>;
|
||||
|
||||
|
@ -636,9 +644,9 @@ def FNEGD : XForm_26<63, 40, (ops F8RC:$frD, F8RC:$frB),
|
|||
|
||||
|
||||
let isStore = 1, noResults = 1 in {
|
||||
def STFIWX: XForm_28<31, 983, (ops F4RC:$frS, memrr:$dst),
|
||||
def STFIWX: XForm_28<31, 983, (ops F8RC:$frS, memrr:$dst),
|
||||
"stfiwx $frS, $dst", LdStUX,
|
||||
[]>;
|
||||
[(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>;
|
||||
def STFSX : XForm_28<31, 663, (ops F4RC:$frS, memrr:$dst),
|
||||
"stfsx $frS, $dst", LdStUX,
|
||||
[(store F4RC:$frS, xaddr:$dst)]>;
|
||||
|
|
|
@ -77,6 +77,7 @@ PPCSubtarget::PPCSubtarget(const Module &M, const std::string &FS)
|
|||
, Has64BitRegs(false)
|
||||
, HasAltivec(false)
|
||||
, HasFSQRT(false)
|
||||
, HasSTFIWX(false)
|
||||
, IsAIX(false)
|
||||
, IsDarwin(false) {
|
||||
|
||||
|
|
|
@ -5,12 +5,6 @@ TODO:
|
|||
|
||||
===-------------------------------------------------------------------------===
|
||||
|
||||
Use the stfiwx instruction for:
|
||||
|
||||
void foo(float a, int *b) { *b = a; }
|
||||
|
||||
===-------------------------------------------------------------------------===
|
||||
|
||||
Support 'update' load/store instructions. These are cracked on the G5, but are
|
||||
still a codesize win.
|
||||
|
||||
|
|
Loading…
Reference in New Issue