forked from OSchip/llvm-project
Add the PPC lfiwax instruction
This instruction is available on modern PPC64 CPUs, and is now used to improve the SINT_TO_FP lowering (by eliminating the need for the separate sign extension instruction and decreasing the amount of needed stack space). llvm-svn: 178446
This commit is contained in:
parent
e53429a13e
commit
beb296bea1
|
@ -59,6 +59,8 @@ def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
|
||||||
"Enable the fsqrt instruction">;
|
"Enable the fsqrt instruction">;
|
||||||
def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
|
def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
|
||||||
"Enable the stfiwx instruction">;
|
"Enable the stfiwx instruction">;
|
||||||
|
def FeatureLFIWAX : SubtargetFeature<"lfiwax","HasLFIWAX", "true",
|
||||||
|
"Enable the lfiwax instruction">;
|
||||||
def FeatureFPRND : SubtargetFeature<"fprnd", "HasFPRND", "true",
|
def FeatureFPRND : SubtargetFeature<"fprnd", "HasFPRND", "true",
|
||||||
"Enable the fri[mnpz] instructions">;
|
"Enable the fri[mnpz] instructions">;
|
||||||
def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true",
|
def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true",
|
||||||
|
@ -80,7 +82,6 @@ def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
|
||||||
// FLT_CVT p7 fcfids, fcfidu, fcfidus, fcfiduz, fctiwuz
|
// FLT_CVT p7 fcfids, fcfidu, fcfidus, fcfiduz, fctiwuz
|
||||||
// FRE p5 through p7 fre (vs. fres, available since p3)
|
// FRE p5 through p7 fre (vs. fres, available since p3)
|
||||||
// FRSQRTES p5 through p7 frsqrtes (vs. frsqrte, available since p3)
|
// FRSQRTES p5 through p7 frsqrtes (vs. frsqrte, available since p3)
|
||||||
// LFIWAX p6, p6x, p7 lfiwax
|
|
||||||
// LFIWZX p7 lfiwzx
|
// LFIWZX p7 lfiwzx
|
||||||
// POPCNTB p5 through p7 popcntb and related instructions
|
// POPCNTB p5 through p7 popcntb and related instructions
|
||||||
// RECIP_PREC p6, p6x, p7 higher precision reciprocal estimates
|
// RECIP_PREC p6, p6x, p7 higher precision reciprocal estimates
|
||||||
|
@ -133,14 +134,15 @@ def : ProcessorModel<"e5500", PPCE5500Model,
|
||||||
FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
|
FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
|
||||||
def : Processor<"a2", PPCA2Itineraries,
|
def : Processor<"a2", PPCA2Itineraries,
|
||||||
[DirectiveA2, FeatureBookE, FeatureMFOCRF,
|
[DirectiveA2, FeatureBookE, FeatureMFOCRF,
|
||||||
FeatureFSqrt, FeatureSTFIWX, FeatureFPRND,
|
FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX,
|
||||||
FeatureISEL, FeaturePOPCNTD, FeatureLDBRX,
|
FeatureFPRND, FeatureISEL, FeaturePOPCNTD,
|
||||||
Feature64Bit /*, Feature64BitRegs */]>;
|
FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */]>;
|
||||||
def : Processor<"a2q", PPCA2Itineraries,
|
def : Processor<"a2q", PPCA2Itineraries,
|
||||||
[DirectiveA2, FeatureBookE, FeatureMFOCRF,
|
[DirectiveA2, FeatureBookE, FeatureMFOCRF,
|
||||||
FeatureFSqrt, FeatureSTFIWX, FeatureFPRND,
|
FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX,
|
||||||
FeatureISEL, FeaturePOPCNTD, FeatureLDBRX,
|
FeatureFPRND, FeatureISEL, FeaturePOPCNTD,
|
||||||
Feature64Bit /*, Feature64BitRegs */, FeatureQPX]>;
|
FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */,
|
||||||
|
FeatureQPX]>;
|
||||||
def : Processor<"pwr3", G5Itineraries,
|
def : Processor<"pwr3", G5Itineraries,
|
||||||
[DirectivePwr3, FeatureAltivec, FeatureMFOCRF,
|
[DirectivePwr3, FeatureAltivec, FeatureMFOCRF,
|
||||||
FeatureSTFIWX, Feature64Bit]>;
|
FeatureSTFIWX, Feature64Bit]>;
|
||||||
|
@ -157,16 +159,18 @@ def : Processor<"pwr5x", G5Itineraries,
|
||||||
def : Processor<"pwr6", G5Itineraries,
|
def : Processor<"pwr6", G5Itineraries,
|
||||||
[DirectivePwr6, FeatureAltivec,
|
[DirectivePwr6, FeatureAltivec,
|
||||||
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
|
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
|
||||||
FeatureFPRND, Feature64Bit /*, Feature64BitRegs */]>;
|
FeatureLFIWAX, FeatureFPRND, Feature64Bit
|
||||||
|
/*, Feature64BitRegs */]>;
|
||||||
def : Processor<"pwr6x", G5Itineraries,
|
def : Processor<"pwr6x", G5Itineraries,
|
||||||
[DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
|
[DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
|
||||||
FeatureFSqrt, FeatureSTFIWX, FeatureFPRND,
|
FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX,
|
||||||
Feature64Bit]>;
|
FeatureFPRND, Feature64Bit]>;
|
||||||
def : Processor<"pwr7", G5Itineraries,
|
def : Processor<"pwr7", G5Itineraries,
|
||||||
[DirectivePwr7, FeatureAltivec,
|
[DirectivePwr7, FeatureAltivec,
|
||||||
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
|
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
|
||||||
FeatureFPRND, FeatureISEL, FeaturePOPCNTD,
|
FeatureLFIWAX, FeatureFPRND, FeatureISEL,
|
||||||
FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */]>;
|
FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
|
||||||
|
/*, Feature64BitRegs */]>;
|
||||||
def : Processor<"ppc", G3Itineraries, [Directive32]>;
|
def : Processor<"ppc", G3Itineraries, [Directive32]>;
|
||||||
def : Processor<"ppc64", G5Itineraries,
|
def : Processor<"ppc64", G5Itineraries,
|
||||||
[Directive64, FeatureAltivec,
|
[Directive64, FeatureAltivec,
|
||||||
|
|
|
@ -4809,20 +4809,43 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
|
||||||
// then lfd it and fcfid it.
|
// then lfd it and fcfid it.
|
||||||
MachineFunction &MF = DAG.getMachineFunction();
|
MachineFunction &MF = DAG.getMachineFunction();
|
||||||
MachineFrameInfo *FrameInfo = MF.getFrameInfo();
|
MachineFrameInfo *FrameInfo = MF.getFrameInfo();
|
||||||
int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
|
|
||||||
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
||||||
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
|
|
||||||
|
|
||||||
SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
|
SDValue Ld;
|
||||||
Op.getOperand(0));
|
if (PPCSubTarget.hasLFIWAX()) {
|
||||||
|
int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
|
||||||
|
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
|
||||||
|
|
||||||
// STD the extended value into the stack slot.
|
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
|
||||||
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
|
MachinePointerInfo::getFixedStack(FrameIdx),
|
||||||
MachinePointerInfo(), false, false, 0);
|
false, false, 0);
|
||||||
|
|
||||||
// Load the value as a double.
|
assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
|
||||||
SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
|
"Expected an i32 store");
|
||||||
false, false, false, 0);
|
MachineMemOperand *MMO =
|
||||||
|
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
|
||||||
|
MachineMemOperand::MOLoad, 4, 4);
|
||||||
|
SDValue Ops[] = { Store, FIdx };
|
||||||
|
Ld = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
|
||||||
|
DAG.getVTList(MVT::f64, MVT::Other), Ops, 2,
|
||||||
|
MVT::i32, MMO);
|
||||||
|
} else {
|
||||||
|
int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
|
||||||
|
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
|
||||||
|
|
||||||
|
SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
|
||||||
|
Op.getOperand(0));
|
||||||
|
|
||||||
|
// STD the extended value into the stack slot.
|
||||||
|
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
|
||||||
|
MachinePointerInfo::getFixedStack(FrameIdx),
|
||||||
|
false, false, 0);
|
||||||
|
|
||||||
|
// Load the value as a double.
|
||||||
|
Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
|
||||||
|
MachinePointerInfo::getFixedStack(FrameIdx),
|
||||||
|
false, false, false, 0);
|
||||||
|
}
|
||||||
|
|
||||||
// FCFID it and return it.
|
// FCFID it and return it.
|
||||||
SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
|
SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
|
||||||
|
|
|
@ -242,6 +242,11 @@ namespace llvm {
|
||||||
/// or i32.
|
/// or i32.
|
||||||
LBRX,
|
LBRX,
|
||||||
|
|
||||||
|
/// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
|
||||||
|
/// load which sign-extends from a 32-bit integer value into the
|
||||||
|
/// destination 64-bit register.
|
||||||
|
LFIWAX,
|
||||||
|
|
||||||
/// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model,
|
/// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model,
|
||||||
/// produces an ADDIS8 instruction that adds the TOC base register to
|
/// produces an ADDIS8 instruction that adds the TOC base register to
|
||||||
/// sym@toc@ha.
|
/// sym@toc@ha.
|
||||||
|
|
|
@ -20,6 +20,10 @@ include "PPCInstrFormats.td"
|
||||||
def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
|
def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
|
||||||
SDTCisVT<0, f64>, SDTCisPtrTy<1>
|
SDTCisVT<0, f64>, SDTCisPtrTy<1>
|
||||||
]>;
|
]>;
|
||||||
|
def SDT_PPClfiwax : SDTypeProfile<1, 1, [ // lfiwax
|
||||||
|
SDTCisVT<0, f64>, SDTCisPtrTy<1>
|
||||||
|
]>;
|
||||||
|
|
||||||
def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
|
def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
|
||||||
def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
|
def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
|
||||||
SDTCisVT<1, i32> ]>;
|
SDTCisVT<1, i32> ]>;
|
||||||
|
@ -63,6 +67,8 @@ def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
|
||||||
def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
|
def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
|
||||||
def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
|
def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
|
||||||
[SDNPHasChain, SDNPMayStore]>;
|
[SDNPHasChain, SDNPMayStore]>;
|
||||||
|
def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwax,
|
||||||
|
[SDNPHasChain, SDNPMayLoad]>;
|
||||||
|
|
||||||
// Extract FPSCR (not modeled at the DAG level).
|
// Extract FPSCR (not modeled at the DAG level).
|
||||||
def PPCmffs : SDNode<"PPCISD::MFFS",
|
def PPCmffs : SDNode<"PPCISD::MFFS",
|
||||||
|
@ -843,6 +849,10 @@ def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
|
||||||
def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
|
def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
|
||||||
"lfdx $frD, $src", LdStLFD,
|
"lfdx $frD, $src", LdStLFD,
|
||||||
[(set f64:$frD, (load xaddr:$src))]>;
|
[(set f64:$frD, (load xaddr:$src))]>;
|
||||||
|
|
||||||
|
def LFIWAX : XForm_25<31, 855, (outs F8RC:$frD), (ins memrr:$src),
|
||||||
|
"lfiwax $frD, $src", LdStLFD,
|
||||||
|
[(set f64:$frD, (PPClfiwax xoaddr:$src))]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
|
@ -528,6 +528,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||||
|
|
||||||
bool noImmForm = false;
|
bool noImmForm = false;
|
||||||
switch (OpC) {
|
switch (OpC) {
|
||||||
|
case PPC::LFIWAX:
|
||||||
case PPC::LVEBX:
|
case PPC::LVEBX:
|
||||||
case PPC::LVEHX:
|
case PPC::LVEHX:
|
||||||
case PPC::LVEWX:
|
case PPC::LVEWX:
|
||||||
|
|
|
@ -39,6 +39,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
|
||||||
, HasQPX(false)
|
, HasQPX(false)
|
||||||
, HasFSQRT(false)
|
, HasFSQRT(false)
|
||||||
, HasSTFIWX(false)
|
, HasSTFIWX(false)
|
||||||
|
, HasLFIWAX(false)
|
||||||
, HasFPRND(false)
|
, HasFPRND(false)
|
||||||
, HasISEL(false)
|
, HasISEL(false)
|
||||||
, HasPOPCNTD(false)
|
, HasPOPCNTD(false)
|
||||||
|
|
|
@ -78,6 +78,7 @@ protected:
|
||||||
bool HasQPX;
|
bool HasQPX;
|
||||||
bool HasFSQRT;
|
bool HasFSQRT;
|
||||||
bool HasSTFIWX;
|
bool HasSTFIWX;
|
||||||
|
bool HasLFIWAX;
|
||||||
bool HasFPRND;
|
bool HasFPRND;
|
||||||
bool HasISEL;
|
bool HasISEL;
|
||||||
bool HasPOPCNTD;
|
bool HasPOPCNTD;
|
||||||
|
@ -158,6 +159,7 @@ public:
|
||||||
// Specific obvious features.
|
// Specific obvious features.
|
||||||
bool hasFSQRT() const { return HasFSQRT; }
|
bool hasFSQRT() const { return HasFSQRT; }
|
||||||
bool hasSTFIWX() const { return HasSTFIWX; }
|
bool hasSTFIWX() const { return HasSTFIWX; }
|
||||||
|
bool hasLFIWAX() const { return HasLFIWAX; }
|
||||||
bool hasFPRND() const { return HasFPRND; }
|
bool hasFPRND() const { return HasFPRND; }
|
||||||
bool hasAltivec() const { return HasAltivec; }
|
bool hasAltivec() const { return HasAltivec; }
|
||||||
bool hasQPX() const { return HasQPX; }
|
bool hasQPX() const { return HasQPX; }
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
|
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
|
||||||
|
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck -check-prefix=CHECK-A2 %s
|
||||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||||
target triple = "powerpc64-unknown-linux-gnu"
|
target triple = "powerpc64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
@ -14,6 +15,13 @@ entry:
|
||||||
; CHECK: fcfid [[REG3:[0-9]+]], [[REG2]]
|
; CHECK: fcfid [[REG3:[0-9]+]], [[REG2]]
|
||||||
; CHECK: frsp 1, [[REG3]]
|
; CHECK: frsp 1, [[REG3]]
|
||||||
; CHECK: blr
|
; CHECK: blr
|
||||||
|
|
||||||
|
; CHECK-A2: @foo
|
||||||
|
; CHECK-A2: stw 3,
|
||||||
|
; CHECK-A2: lfiwax [[REG:[0-9]+]],
|
||||||
|
; CHECK-A2: fcfid [[REG2:[0-9]+]], [[REG]]
|
||||||
|
; CHECK-A2: frsp 1, [[REG2]]
|
||||||
|
; CHECK-A2: blr
|
||||||
}
|
}
|
||||||
|
|
||||||
define double @goo(i32 %a) nounwind {
|
define double @goo(i32 %a) nounwind {
|
||||||
|
@ -27,5 +35,11 @@ entry:
|
||||||
; CHECK: lfd [[REG2:[0-9]+]],
|
; CHECK: lfd [[REG2:[0-9]+]],
|
||||||
; CHECK: fcfid 1, [[REG2]]
|
; CHECK: fcfid 1, [[REG2]]
|
||||||
; CHECK: blr
|
; CHECK: blr
|
||||||
|
|
||||||
|
; CHECK-A2: @goo
|
||||||
|
; CHECK-A2: stw 3,
|
||||||
|
; CHECK-A2: lfiwax [[REG:[0-9]+]],
|
||||||
|
; CHECK-A2: fcfid 1, [[REG]]
|
||||||
|
; CHECK-A2: blr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue