2005-10-18 08:28:58 +08:00
|
|
|
//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
|
2005-08-17 01:14:42 +08:00
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
2007-12-30 04:36:04 +08:00
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
2005-08-17 01:14:42 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2005-10-16 13:39:50 +08:00
|
|
|
// This file implements the PPCISelLowering class.
|
2005-08-17 01:14:42 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2005-10-15 07:59:06 +08:00
|
|
|
#include "PPCISelLowering.h"
|
2006-11-17 06:43:37 +08:00
|
|
|
#include "PPCMachineFunctionInfo.h"
|
2006-11-18 06:10:59 +08:00
|
|
|
#include "PPCPredicates.h"
|
2005-10-15 07:59:06 +08:00
|
|
|
#include "PPCTargetMachine.h"
|
2006-04-17 13:28:54 +08:00
|
|
|
#include "PPCPerfectShuffle.h"
|
2007-09-07 12:06:50 +08:00
|
|
|
#include "llvm/ADT/STLExtras.h"
|
2006-02-01 15:19:44 +08:00
|
|
|
#include "llvm/ADT/VectorExtras.h"
|
2007-03-06 08:59:59 +08:00
|
|
|
#include "llvm/CodeGen/CallingConvLower.h"
|
2005-08-17 01:14:42 +08:00
|
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
2005-08-27 05:23:58 +08:00
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
2007-12-31 12:13:23 +08:00
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
2008-02-07 06:27:42 +08:00
|
|
|
#include "llvm/CodeGen/PseudoSourceValue.h"
|
2005-08-17 01:14:42 +08:00
|
|
|
#include "llvm/CodeGen/SelectionDAG.h"
|
2008-04-30 17:16:33 +08:00
|
|
|
#include "llvm/CallingConv.h"
|
2005-08-27 01:36:52 +08:00
|
|
|
#include "llvm/Constants.h"
|
2005-08-17 01:14:42 +08:00
|
|
|
#include "llvm/Function.h"
|
2006-03-26 18:06:40 +08:00
|
|
|
#include "llvm/Intrinsics.h"
|
2006-02-01 15:19:44 +08:00
|
|
|
#include "llvm/Support/MathExtras.h"
|
2006-02-18 08:08:58 +08:00
|
|
|
#include "llvm/Target/TargetOptions.h"
|
2009-07-28 11:13:23 +08:00
|
|
|
#include "llvm/Target/TargetLoweringObjectFile.h"
|
2006-11-10 10:08:47 +08:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
2009-07-09 04:53:28 +08:00
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2009-05-12 03:38:09 +08:00
|
|
|
#include "llvm/DerivedTypes.h"
|
2005-08-17 01:14:42 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
2009-08-11 06:56:29 +08:00
|
|
|
static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
|
2009-07-03 14:45:56 +08:00
|
|
|
CCValAssign::LocInfo &LocInfo,
|
|
|
|
ISD::ArgFlagsTy &ArgFlags,
|
|
|
|
CCState &State);
|
2009-08-11 06:56:29 +08:00
|
|
|
static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, EVT &ValVT,
|
|
|
|
EVT &LocVT,
|
2009-07-03 14:45:56 +08:00
|
|
|
CCValAssign::LocInfo &LocInfo,
|
|
|
|
ISD::ArgFlagsTy &ArgFlags,
|
|
|
|
CCState &State);
|
2009-08-11 06:56:29 +08:00
|
|
|
static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, EVT &ValVT,
|
|
|
|
EVT &LocVT,
|
2009-07-03 14:45:56 +08:00
|
|
|
CCValAssign::LocInfo &LocInfo,
|
|
|
|
ISD::ArgFlagsTy &ArgFlags,
|
|
|
|
CCState &State);
|
|
|
|
|
2009-02-18 06:15:04 +08:00
|
|
|
static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc",
|
2007-06-19 13:46:06 +08:00
|
|
|
cl::desc("enable preincrement load/store generation on PPC (experimental)"),
|
|
|
|
cl::Hidden);
|
2006-11-10 10:08:47 +08:00
|
|
|
|
2009-07-28 11:13:23 +08:00
|
|
|
static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
|
|
|
|
if (TM.getSubtargetImpl()->isDarwin())
|
2009-08-01 01:42:42 +08:00
|
|
|
return new TargetLoweringObjectFileMachO();
|
2009-08-14 07:30:21 +08:00
|
|
|
return new TargetLoweringObjectFileELF();
|
2009-07-28 11:13:23 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-11-02 09:44:04 +08:00
|
|
|
PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
|
2009-07-28 11:13:23 +08:00
|
|
|
: TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2005-10-21 08:02:42 +08:00
|
|
|
setPow2DivIsCheap();
|
2008-08-01 02:13:12 +08:00
|
|
|
|
2005-09-28 06:18:25 +08:00
|
|
|
// Use _setjmp/_longjmp instead of setjmp/longjmp.
|
2006-12-11 07:12:42 +08:00
|
|
|
setUseUnderscoreSetJmp(true);
|
|
|
|
setUseUnderscoreLongJmp(true);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2005-08-17 01:14:42 +08:00
|
|
|
// Set up the register classes.
|
2009-08-12 04:47:22 +08:00
|
|
|
addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
|
|
|
|
addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
|
|
|
|
addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-10-04 08:56:09 +08:00
|
|
|
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD
|
2009-08-12 04:47:22 +08:00
|
|
|
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
|
|
|
|
setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
|
2008-01-24 04:39:46 +08:00
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-11-11 07:58:45 +08:00
|
|
|
// PowerPC has pre-inc load and store's.
|
2009-08-12 04:47:22 +08:00
|
|
|
setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
|
|
|
|
setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
|
|
|
|
setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
|
|
|
|
setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
|
|
|
|
setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
|
|
|
|
setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
|
|
|
|
setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
|
|
|
|
setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
|
|
|
|
setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
|
|
|
|
setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
|
2006-11-10 03:11:50 +08:00
|
|
|
|
2007-10-10 09:01:31 +08:00
|
|
|
// This is used in the ppcf128->int sequence. Note it has different semantics
|
|
|
|
// from FP_ROUND: that rounds to nearest, this rounds to zero.
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
|
2007-10-06 09:24:11 +08:00
|
|
|
|
2005-08-17 01:14:42 +08:00
|
|
|
// PowerPC has no SREM/UREM instructions
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::SREM, MVT::i32, Expand);
|
|
|
|
setOperationAction(ISD::UREM, MVT::i32, Expand);
|
|
|
|
setOperationAction(ISD::SREM, MVT::i64, Expand);
|
|
|
|
setOperationAction(ISD::UREM, MVT::i64, Expand);
|
2007-10-09 01:28:24 +08:00
|
|
|
|
|
|
|
// Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
|
|
|
|
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
|
|
|
|
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
|
|
|
|
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
|
|
|
|
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
|
|
|
|
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
|
|
|
|
setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
|
|
|
|
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2007-10-12 07:21:31 +08:00
|
|
|
// We don't support sin/cos/sqrt/fmod/pow
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::FSIN , MVT::f64, Expand);
|
|
|
|
setOperationAction(ISD::FCOS , MVT::f64, Expand);
|
|
|
|
setOperationAction(ISD::FREM , MVT::f64, Expand);
|
|
|
|
setOperationAction(ISD::FPOW , MVT::f64, Expand);
|
|
|
|
setOperationAction(ISD::FSIN , MVT::f32, Expand);
|
|
|
|
setOperationAction(ISD::FCOS , MVT::f32, Expand);
|
|
|
|
setOperationAction(ISD::FREM , MVT::f32, Expand);
|
|
|
|
setOperationAction(ISD::FPOW , MVT::f32, Expand);
|
2008-01-19 03:55:37 +08:00
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2005-08-17 01:14:42 +08:00
|
|
|
// If we're enabling GP optimizations, use hardware square root
|
2005-09-03 02:33:05 +08:00
|
|
|
if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
|
|
|
|
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
|
2005-08-17 01:14:42 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
|
|
|
|
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-01-14 11:14:10 +08:00
|
|
|
// PowerPC does not have BSWAP, CTPOP or CTTZ
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
|
|
|
|
setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
|
|
|
|
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
|
|
|
|
setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
|
|
|
|
setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
|
|
|
|
setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-01-12 05:21:00 +08:00
|
|
|
// PowerPC does not have ROTR
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::ROTR, MVT::i32 , Expand);
|
|
|
|
setOperationAction(ISD::ROTR, MVT::i64 , Expand);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2005-08-17 01:14:42 +08:00
|
|
|
// PowerPC does not have Select
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::SELECT, MVT::i32, Expand);
|
|
|
|
setOperationAction(ISD::SELECT, MVT::i64, Expand);
|
|
|
|
setOperationAction(ISD::SELECT, MVT::f32, Expand);
|
|
|
|
setOperationAction(ISD::SELECT, MVT::f64, Expand);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2005-08-27 01:36:52 +08:00
|
|
|
// PowerPC wants to turn select_cc of FP into fsel when possible.
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
|
|
|
|
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
|
Codegen
bool %test(int %X) {
%Y = seteq int %X, 13
ret bool %Y
}
as
_test:
addi r2, r3, -13
cntlzw r2, r2
srwi r3, r2, 5
blr
rather than
_test:
cmpwi cr7, r3, 13
mfcr r2
rlwinm r3, r2, 31, 31, 31
blr
This has very little effect on most code, but speeds up analyzer 23% and
mason 11%
llvm-svn: 25848
2006-01-31 16:17:29 +08:00
|
|
|
|
2006-02-01 15:19:44 +08:00
|
|
|
// PowerPC wants to optimize integer setcc a bit
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::SETCC, MVT::i32, Custom);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-03-17 09:40:33 +08:00
|
|
|
// PowerPC does not have BRCOND which requires SetCC
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
|
2006-10-30 16:02:39 +08:00
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2005-09-01 05:09:52 +08:00
|
|
|
// PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
|
2005-09-07 06:03:27 +08:00
|
|
|
|
2005-08-17 08:40:22 +08:00
|
|
|
// PowerPC does not have [U|S]INT_TO_FP
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
|
|
|
|
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
|
2005-08-17 08:40:22 +08:00
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
|
|
|
|
setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
|
|
|
|
setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
|
|
|
|
setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
|
2005-12-23 13:13:35 +08:00
|
|
|
|
2006-04-29 05:56:10 +08:00
|
|
|
// We cannot sextinreg(i1). Expand to shifts.
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
|
2007-02-22 22:56:36 +08:00
|
|
|
|
2006-01-05 09:25:28 +08:00
|
|
|
// Support label based line numbers.
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
|
|
|
|
setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
|
|
|
|
setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
|
|
|
|
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
|
|
|
|
setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
|
|
|
|
|
|
|
// We want to legalize GlobalAddress and ConstantPool nodes into the
|
2005-12-10 10:36:00 +08:00
|
|
|
// appropriate instructions to materialize the address.
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
|
|
|
|
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
|
|
|
|
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
|
|
|
|
setOperationAction(ISD::JumpTable, MVT::i32, Custom);
|
|
|
|
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
|
|
|
|
setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
|
|
|
|
setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
|
|
|
|
setOperationAction(ISD::JumpTable, MVT::i64, Custom);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-08-12 01:36:31 +08:00
|
|
|
// TRAP is legal.
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::TRAP, MVT::Other, Legal);
|
2008-09-17 08:30:57 +08:00
|
|
|
|
|
|
|
// TRAMPOLINE is custom lowered.
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
|
2008-09-17 08:30:57 +08:00
|
|
|
|
2006-01-26 02:21:52 +08:00
|
|
|
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::VASTART , MVT::Other, Custom);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-08-15 19:54:46 +08:00
|
|
|
// VAARG is custom lowered with the 32-bit SVR4 ABI.
|
|
|
|
if ( TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
|
|
|
|
&& !TM.getSubtarget<PPCSubtarget>().isPPC64())
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::VAARG, MVT::Other, Custom);
|
2007-04-03 21:59:52 +08:00
|
|
|
else
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::VAARG, MVT::Other, Expand);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-01-15 17:02:48 +08:00
|
|
|
// Use the default implementation.
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::VACOPY , MVT::Other, Expand);
|
|
|
|
setOperationAction(ISD::VAEND , MVT::Other, Expand);
|
|
|
|
setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
|
|
|
|
setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
|
|
|
|
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
|
|
|
|
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
|
2006-10-18 09:18:48 +08:00
|
|
|
|
2006-03-26 18:06:40 +08:00
|
|
|
// We want to custom lower some of our intrinsics.
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-11-08 06:54:33 +08:00
|
|
|
// Comparisons that require checking two conditions.
|
2009-08-12 04:47:22 +08:00
|
|
|
setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
|
|
|
|
setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
|
|
|
|
setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
|
|
|
|
setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
|
|
|
|
setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
|
|
|
|
setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
|
|
|
|
setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
|
|
|
|
setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
|
|
|
|
setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
|
|
|
|
setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
|
|
|
|
setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
|
|
|
|
setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-06-17 01:34:12 +08:00
|
|
|
if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
|
2005-10-18 08:28:58 +08:00
|
|
|
// They also have instructions for converting between i64 and fp.
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
|
|
|
|
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
|
|
|
|
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
|
|
|
|
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
|
2009-06-05 04:53:52 +08:00
|
|
|
// This is just the low 32 bits of a (signed) fp->i64 conversion.
|
|
|
|
// We cannot do this with Promote because i64 is not a legal type.
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-03-24 15:53:47 +08:00
|
|
|
// FIXME: disable this lowered code. This generates 64-bit register values,
|
|
|
|
// and we don't model the fact that the top part is clobbered by calls. We
|
|
|
|
// need to flag these together so that the value isn't live across a call.
|
2009-08-12 04:47:22 +08:00
|
|
|
//setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
|
2005-10-26 07:48:36 +08:00
|
|
|
} else {
|
2005-11-17 15:30:41 +08:00
|
|
|
// PowerPC does not have FP_TO_UINT on 32-bit implementations.
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
|
2005-10-18 08:56:42 +08:00
|
|
|
}
|
|
|
|
|
2006-06-17 01:34:12 +08:00
|
|
|
if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) {
|
2007-10-19 12:08:28 +08:00
|
|
|
// 64-bit PowerPC implementations can support i64 types directly
|
2009-08-12 04:47:22 +08:00
|
|
|
addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
|
2005-10-18 08:28:58 +08:00
|
|
|
// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
|
2008-03-08 04:36:53 +08:00
|
|
|
// 64-bit PowerPC wants to expand i128 shifts itself.
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
|
|
|
|
setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
|
|
|
|
setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
|
2005-10-18 08:28:58 +08:00
|
|
|
} else {
|
2007-10-19 12:08:28 +08:00
|
|
|
// 32-bit PowerPC wants to expand i64 shifts itself.
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
|
|
|
|
setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
|
|
|
|
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
|
2005-09-07 06:03:27 +08:00
|
|
|
}
|
2006-03-01 09:11:20 +08:00
|
|
|
|
2005-11-29 16:17:20 +08:00
|
|
|
if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
|
2006-04-01 03:52:36 +08:00
|
|
|
// First set operation action for all vector types to expand. Then we
|
|
|
|
// will selectively turn on ones that can be effectively codegen'd.
|
2009-08-12 04:47:22 +08:00
|
|
|
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
|
|
|
|
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
|
|
|
|
MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
|
2008-06-06 20:08:01 +08:00
|
|
|
|
2006-04-16 09:37:57 +08:00
|
|
|
// add/sub are legal for all supported vector VT's.
|
2008-06-06 20:08:01 +08:00
|
|
|
setOperationAction(ISD::ADD , VT, Legal);
|
|
|
|
setOperationAction(ISD::SUB , VT, Legal);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-05 01:25:31 +08:00
|
|
|
// We promote all shuffles to v16i8.
|
2008-06-06 20:08:01 +08:00
|
|
|
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
|
2009-08-12 04:47:22 +08:00
|
|
|
AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
|
2006-04-16 09:37:57 +08:00
|
|
|
|
|
|
|
// We promote all non-typed operations to v4i32.
|
2008-06-06 20:08:01 +08:00
|
|
|
setOperationAction(ISD::AND , VT, Promote);
|
2009-08-12 04:47:22 +08:00
|
|
|
AddPromotedToType (ISD::AND , VT, MVT::v4i32);
|
2008-06-06 20:08:01 +08:00
|
|
|
setOperationAction(ISD::OR , VT, Promote);
|
2009-08-12 04:47:22 +08:00
|
|
|
AddPromotedToType (ISD::OR , VT, MVT::v4i32);
|
2008-06-06 20:08:01 +08:00
|
|
|
setOperationAction(ISD::XOR , VT, Promote);
|
2009-08-12 04:47:22 +08:00
|
|
|
AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
|
2008-06-06 20:08:01 +08:00
|
|
|
setOperationAction(ISD::LOAD , VT, Promote);
|
2009-08-12 04:47:22 +08:00
|
|
|
AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
|
2008-06-06 20:08:01 +08:00
|
|
|
setOperationAction(ISD::SELECT, VT, Promote);
|
2009-08-12 04:47:22 +08:00
|
|
|
AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
|
2008-06-06 20:08:01 +08:00
|
|
|
setOperationAction(ISD::STORE, VT, Promote);
|
2009-08-12 04:47:22 +08:00
|
|
|
AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-16 09:37:57 +08:00
|
|
|
// No other operations are legal.
|
2008-06-06 20:08:01 +08:00
|
|
|
setOperationAction(ISD::MUL , VT, Expand);
|
|
|
|
setOperationAction(ISD::SDIV, VT, Expand);
|
|
|
|
setOperationAction(ISD::SREM, VT, Expand);
|
|
|
|
setOperationAction(ISD::UDIV, VT, Expand);
|
|
|
|
setOperationAction(ISD::UREM, VT, Expand);
|
|
|
|
setOperationAction(ISD::FDIV, VT, Expand);
|
|
|
|
setOperationAction(ISD::FNEG, VT, Expand);
|
|
|
|
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
|
|
|
|
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
|
|
|
|
setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
|
|
|
|
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
|
|
|
|
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
|
|
|
|
setOperationAction(ISD::UDIVREM, VT, Expand);
|
|
|
|
setOperationAction(ISD::SDIVREM, VT, Expand);
|
|
|
|
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
|
|
|
|
setOperationAction(ISD::FPOW, VT, Expand);
|
|
|
|
setOperationAction(ISD::CTPOP, VT, Expand);
|
|
|
|
setOperationAction(ISD::CTLZ, VT, Expand);
|
|
|
|
setOperationAction(ISD::CTTZ, VT, Expand);
|
2006-04-01 03:52:36 +08:00
|
|
|
}
|
|
|
|
|
2006-04-05 01:25:31 +08:00
|
|
|
// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
|
|
|
|
// with merges, splats, etc.
|
2009-08-12 04:47:22 +08:00
|
|
|
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
|
|
|
|
|
|
|
|
setOperationAction(ISD::AND , MVT::v4i32, Legal);
|
|
|
|
setOperationAction(ISD::OR , MVT::v4i32, Legal);
|
|
|
|
setOperationAction(ISD::XOR , MVT::v4i32, Legal);
|
|
|
|
setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
|
|
|
|
setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
|
|
|
|
setOperationAction(ISD::STORE , MVT::v4i32, Legal);
|
|
|
|
|
|
|
|
addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
|
|
|
|
addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
|
|
|
|
addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
|
|
|
|
addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
|
|
|
|
|
|
|
|
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
|
|
|
|
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
|
|
|
|
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
|
|
|
|
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
|
|
|
|
|
|
|
|
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
|
|
|
|
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
|
|
|
|
|
|
|
|
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
|
|
|
|
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
|
|
|
|
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
|
|
|
|
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
|
2005-11-29 16:17:20 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
setShiftAmountType(MVT::i32);
|
2008-11-23 23:47:28 +08:00
|
|
|
setBooleanContents(ZeroOrOneBooleanContent);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2007-02-22 22:56:36 +08:00
|
|
|
if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
|
2006-10-18 09:20:43 +08:00
|
|
|
setStackPointerRegisterToSaveRestore(PPC::X1);
|
2007-02-22 22:56:36 +08:00
|
|
|
setExceptionPointerRegister(PPC::X3);
|
|
|
|
setExceptionSelectorRegister(PPC::X4);
|
|
|
|
} else {
|
2006-10-18 09:20:43 +08:00
|
|
|
setStackPointerRegisterToSaveRestore(PPC::R1);
|
2007-02-22 22:56:36 +08:00
|
|
|
setExceptionPointerRegister(PPC::R3);
|
|
|
|
setExceptionSelectorRegister(PPC::R4);
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-03-01 12:57:39 +08:00
|
|
|
// We have target-specific dag combine patterns for the following nodes:
|
|
|
|
setTargetDAGCombine(ISD::SINT_TO_FP);
|
2006-03-01 13:50:56 +08:00
|
|
|
setTargetDAGCombine(ISD::STORE);
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
setTargetDAGCombine(ISD::BR_CC);
|
2006-07-11 04:56:58 +08:00
|
|
|
setTargetDAGCombine(ISD::BSWAP);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2007-10-19 08:59:18 +08:00
|
|
|
// Darwin long double math library functions have $LDBL128 appended.
|
|
|
|
if (TM.getSubtarget<PPCSubtarget>().isDarwin()) {
|
2008-01-10 18:28:30 +08:00
|
|
|
setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
|
2007-10-19 08:59:18 +08:00
|
|
|
setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
|
|
|
|
setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
|
2008-01-10 18:28:30 +08:00
|
|
|
setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
|
|
|
|
setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
|
2008-09-04 08:47:13 +08:00
|
|
|
setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
|
|
|
|
setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
|
|
|
|
setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
|
|
|
|
setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
|
|
|
|
setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
|
2007-10-19 08:59:18 +08:00
|
|
|
}
|
|
|
|
|
2005-08-17 01:14:42 +08:00
|
|
|
computeRegisterProperties();
|
|
|
|
}
|
|
|
|
|
2008-02-29 06:31:51 +08:00
|
|
|
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
|
|
|
|
/// function arguments in the caller parameter area.
|
|
|
|
unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const {
|
|
|
|
TargetMachine &TM = getTargetMachine();
|
|
|
|
// Darwin passes everything on 4 byte boundary.
|
|
|
|
if (TM.getSubtarget<PPCSubtarget>().isDarwin())
|
|
|
|
return 4;
|
2009-07-03 14:47:08 +08:00
|
|
|
// FIXME SVR4 TBD
|
2008-02-29 06:31:51 +08:00
|
|
|
return 4;
|
|
|
|
}
|
|
|
|
|
2006-01-10 07:52:17 +08:00
|
|
|
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|
|
|
switch (Opcode) {
|
|
|
|
default: return 0;
|
2008-07-12 10:23:19 +08:00
|
|
|
case PPCISD::FSEL: return "PPCISD::FSEL";
|
|
|
|
case PPCISD::FCFID: return "PPCISD::FCFID";
|
|
|
|
case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
|
|
|
|
case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
|
|
|
|
case PPCISD::STFIWX: return "PPCISD::STFIWX";
|
|
|
|
case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
|
|
|
|
case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
|
|
|
|
case PPCISD::VPERM: return "PPCISD::VPERM";
|
|
|
|
case PPCISD::Hi: return "PPCISD::Hi";
|
|
|
|
case PPCISD::Lo: return "PPCISD::Lo";
|
2009-08-15 19:54:46 +08:00
|
|
|
case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
|
2008-07-12 10:23:19 +08:00
|
|
|
case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
|
|
|
|
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
|
|
|
|
case PPCISD::SRL: return "PPCISD::SRL";
|
|
|
|
case PPCISD::SRA: return "PPCISD::SRA";
|
|
|
|
case PPCISD::SHL: return "PPCISD::SHL";
|
|
|
|
case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
|
|
|
|
case PPCISD::STD_32: return "PPCISD::STD_32";
|
2009-07-03 14:47:08 +08:00
|
|
|
case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4";
|
|
|
|
case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin";
|
2009-08-15 19:54:46 +08:00
|
|
|
case PPCISD::NOP: return "PPCISD::NOP";
|
2008-07-12 10:23:19 +08:00
|
|
|
case PPCISD::MTCTR: return "PPCISD::MTCTR";
|
2009-07-03 14:47:08 +08:00
|
|
|
case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin";
|
|
|
|
case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4";
|
2008-07-12 10:23:19 +08:00
|
|
|
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
|
|
|
|
case PPCISD::MFCR: return "PPCISD::MFCR";
|
|
|
|
case PPCISD::VCMP: return "PPCISD::VCMP";
|
|
|
|
case PPCISD::VCMPo: return "PPCISD::VCMPo";
|
|
|
|
case PPCISD::LBRX: return "PPCISD::LBRX";
|
|
|
|
case PPCISD::STBRX: return "PPCISD::STBRX";
|
|
|
|
case PPCISD::LARX: return "PPCISD::LARX";
|
|
|
|
case PPCISD::STCX: return "PPCISD::STCX";
|
|
|
|
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
|
|
|
|
case PPCISD::MFFS: return "PPCISD::MFFS";
|
|
|
|
case PPCISD::MTFSB0: return "PPCISD::MTFSB0";
|
|
|
|
case PPCISD::MTFSB1: return "PPCISD::MTFSB1";
|
|
|
|
case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
|
|
|
|
case PPCISD::MTFSF: return "PPCISD::MTFSF";
|
|
|
|
case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
|
2006-01-10 07:52:17 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
MVT::SimpleValueType PPCTargetLowering::getSetCCResultType(EVT VT) const {
|
|
|
|
return MVT::i32;
|
2008-03-10 23:42:14 +08:00
|
|
|
}
|
|
|
|
|
2009-07-02 02:50:55 +08:00
|
|
|
/// getFunctionAlignment - Return the Log2 alignment of this function.
|
2009-07-01 06:38:32 +08:00
|
|
|
unsigned PPCTargetLowering::getFunctionAlignment(const Function *F) const {
|
|
|
|
if (getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin())
|
|
|
|
return F->hasFnAttr(Attribute::OptimizeForSize) ? 2 : 4;
|
|
|
|
else
|
|
|
|
return 2;
|
|
|
|
}
|
2008-03-10 23:42:14 +08:00
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Node matching predicates, for use by the tblgen matching code.
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2005-08-27 01:36:52 +08:00
|
|
|
/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
|
2008-07-28 05:46:04 +08:00
|
|
|
static bool isFloatingPointZero(SDValue Op) {
|
2005-08-27 01:36:52 +08:00
|
|
|
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
|
2007-08-31 12:03:46 +08:00
|
|
|
return CFP->getValueAPF().isZero();
|
2008-08-29 05:40:38 +08:00
|
|
|
else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
|
2005-08-27 01:36:52 +08:00
|
|
|
// Maybe this has already been legalized into the constant pool?
|
|
|
|
if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
|
2006-09-13 05:04:05 +08:00
|
|
|
if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
|
2007-08-31 12:03:46 +08:00
|
|
|
return CFP->getValueAPF().isZero();
|
2005-08-27 01:36:52 +08:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2006-04-07 01:23:16 +08:00
|
|
|
/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
|
|
|
|
/// true if Op is undef or if it matches the specified value.
|
2009-04-28 02:41:29 +08:00
|
|
|
static bool isConstantOrUndef(int Op, int Val) {
|
|
|
|
return Op < 0 || Op == Val;
|
2006-04-07 01:23:16 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
|
|
|
|
/// VPKUHUM instruction.
|
2009-04-28 02:41:29 +08:00
|
|
|
bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
|
2006-04-07 06:28:36 +08:00
|
|
|
if (!isUnary) {
|
|
|
|
for (unsigned i = 0; i != 16; ++i)
|
2009-04-28 02:41:29 +08:00
|
|
|
if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
|
2006-04-07 06:28:36 +08:00
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
for (unsigned i = 0; i != 8; ++i)
|
2009-04-28 02:41:29 +08:00
|
|
|
if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) ||
|
|
|
|
!isConstantOrUndef(N->getMaskElt(i+8), i*2+1))
|
2006-04-07 06:28:36 +08:00
|
|
|
return false;
|
|
|
|
}
|
2006-04-07 02:26:28 +08:00
|
|
|
return true;
|
2006-04-07 01:23:16 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
|
|
|
|
/// VPKUWUM instruction.
|
2009-04-28 02:41:29 +08:00
|
|
|
bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
|
2006-04-07 06:28:36 +08:00
|
|
|
if (!isUnary) {
|
|
|
|
for (unsigned i = 0; i != 16; i += 2)
|
2009-04-28 02:41:29 +08:00
|
|
|
if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
|
|
|
|
!isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
|
2006-04-07 06:28:36 +08:00
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
for (unsigned i = 0; i != 8; i += 2)
|
2009-04-28 02:41:29 +08:00
|
|
|
if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
|
|
|
|
!isConstantOrUndef(N->getMaskElt(i+1), i*2+3) ||
|
|
|
|
!isConstantOrUndef(N->getMaskElt(i+8), i*2+2) ||
|
|
|
|
!isConstantOrUndef(N->getMaskElt(i+9), i*2+3))
|
2006-04-07 06:28:36 +08:00
|
|
|
return false;
|
|
|
|
}
|
2006-04-07 02:26:28 +08:00
|
|
|
return true;
|
2006-04-07 01:23:16 +08:00
|
|
|
}
|
|
|
|
|
2006-04-07 06:02:42 +08:00
|
|
|
/// isVMerge - Common function, used to match vmrg* shuffles.
|
|
|
|
///
|
2009-04-28 02:41:29 +08:00
|
|
|
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
|
2006-04-07 06:02:42 +08:00
|
|
|
unsigned LHSStart, unsigned RHSStart) {
|
2009-08-12 04:47:22 +08:00
|
|
|
assert(N->getValueType(0) == MVT::v16i8 &&
|
2009-04-28 02:41:29 +08:00
|
|
|
"PPC only supports shuffles by bytes!");
|
2006-04-07 05:11:54 +08:00
|
|
|
assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
|
|
|
|
"Unsupported merge size!");
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-07 05:11:54 +08:00
|
|
|
for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
|
|
|
|
for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
|
2009-04-28 02:41:29 +08:00
|
|
|
if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
|
2006-04-07 06:02:42 +08:00
|
|
|
LHSStart+j+i*UnitSize) ||
|
2009-04-28 02:41:29 +08:00
|
|
|
!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
|
2006-04-07 06:02:42 +08:00
|
|
|
RHSStart+j+i*UnitSize))
|
2006-04-07 05:11:54 +08:00
|
|
|
return false;
|
|
|
|
}
|
2009-04-28 02:41:29 +08:00
|
|
|
return true;
|
2006-04-07 06:02:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
|
|
|
|
/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
|
2009-04-28 02:41:29 +08:00
|
|
|
bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
|
|
|
|
bool isUnary) {
|
2006-04-07 06:02:42 +08:00
|
|
|
if (!isUnary)
|
|
|
|
return isVMerge(N, UnitSize, 8, 24);
|
|
|
|
return isVMerge(N, UnitSize, 8, 8);
|
2006-04-07 05:11:54 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
|
|
|
|
/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
|
2009-04-28 02:41:29 +08:00
|
|
|
bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
|
|
|
|
bool isUnary) {
|
2006-04-07 06:02:42 +08:00
|
|
|
if (!isUnary)
|
|
|
|
return isVMerge(N, UnitSize, 0, 16);
|
|
|
|
return isVMerge(N, UnitSize, 0, 0);
|
2006-04-07 05:11:54 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-04-07 02:26:28 +08:00
|
|
|
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
|
|
|
|
/// amount, otherwise return -1.
|
2006-04-07 06:28:36 +08:00
|
|
|
int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
|
2009-08-12 04:47:22 +08:00
|
|
|
assert(N->getValueType(0) == MVT::v16i8 &&
|
2009-04-28 02:41:29 +08:00
|
|
|
"PPC only supports shuffles by bytes!");
|
|
|
|
|
|
|
|
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
|
|
|
|
|
2006-04-07 02:26:28 +08:00
|
|
|
// Find the first non-undef value in the shuffle mask.
|
|
|
|
unsigned i;
|
2009-04-28 02:41:29 +08:00
|
|
|
for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
|
2006-04-07 02:26:28 +08:00
|
|
|
/*search*/;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-07 02:26:28 +08:00
|
|
|
if (i == 16) return -1; // all undef.
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-04-28 02:41:29 +08:00
|
|
|
// Otherwise, check to see if the rest of the elements are consecutively
|
2006-04-07 02:26:28 +08:00
|
|
|
// numbered from this value.
|
2009-04-28 02:41:29 +08:00
|
|
|
unsigned ShiftAmt = SVOp->getMaskElt(i);
|
2006-04-07 02:26:28 +08:00
|
|
|
if (ShiftAmt < i) return -1;
|
|
|
|
ShiftAmt -= i;
|
|
|
|
|
2006-04-07 06:28:36 +08:00
|
|
|
if (!isUnary) {
|
2009-04-28 02:41:29 +08:00
|
|
|
// Check the rest of the elements to see if they are consecutive.
|
2006-04-07 06:28:36 +08:00
|
|
|
for (++i; i != 16; ++i)
|
2009-04-28 02:41:29 +08:00
|
|
|
if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
|
2006-04-07 06:28:36 +08:00
|
|
|
return -1;
|
|
|
|
} else {
|
2009-04-28 02:41:29 +08:00
|
|
|
// Check the rest of the elements to see if they are consecutive.
|
2006-04-07 06:28:36 +08:00
|
|
|
for (++i; i != 16; ++i)
|
2009-04-28 02:41:29 +08:00
|
|
|
if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
|
2006-04-07 06:28:36 +08:00
|
|
|
return -1;
|
|
|
|
}
|
2006-04-07 02:26:28 +08:00
|
|
|
return ShiftAmt;
|
|
|
|
}
|
2006-03-20 14:33:01 +08:00
|
|
|
|
|
|
|
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
|
|
|
|
/// specifies a splat of a single element that is suitable for input to
|
|
|
|
/// VSPLTB/VSPLTH/VSPLTW.
|
2009-04-28 02:41:29 +08:00
|
|
|
bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
|
2009-08-12 04:47:22 +08:00
|
|
|
assert(N->getValueType(0) == MVT::v16i8 &&
|
2006-04-05 01:25:31 +08:00
|
|
|
(EltSize == 1 || EltSize == 2 || EltSize == 4));
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-03-20 14:37:44 +08:00
|
|
|
// This is a splat operation if each element of the permute is the same, and
|
|
|
|
// if the value doesn't reference the second vector.
|
2009-04-28 02:41:29 +08:00
|
|
|
unsigned ElementBase = N->getMaskElt(0);
|
|
|
|
|
|
|
|
// FIXME: Handle UNDEF elements too!
|
|
|
|
if (ElementBase >= 16)
|
2006-04-05 01:25:31 +08:00
|
|
|
return false;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-04-28 02:41:29 +08:00
|
|
|
// Check that the indices are consecutive, in the case of a multi-byte element
|
|
|
|
// splatted with a v16i8 mask.
|
|
|
|
for (unsigned i = 1; i != EltSize; ++i)
|
|
|
|
if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
|
2006-04-05 01:25:31 +08:00
|
|
|
return false;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-05 01:25:31 +08:00
|
|
|
for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
|
2009-04-28 02:41:29 +08:00
|
|
|
if (N->getMaskElt(i) < 0) continue;
|
2006-04-05 01:25:31 +08:00
|
|
|
for (unsigned j = 0; j != EltSize; ++j)
|
2009-04-28 02:41:29 +08:00
|
|
|
if (N->getMaskElt(i+j) != N->getMaskElt(j))
|
2006-04-05 01:25:31 +08:00
|
|
|
return false;
|
2006-03-20 14:37:44 +08:00
|
|
|
}
|
2006-04-05 01:25:31 +08:00
|
|
|
return true;
|
2006-03-20 14:33:01 +08:00
|
|
|
}
|
|
|
|
|
2007-07-30 15:51:22 +08:00
|
|
|
/// isAllNegativeZeroVector - Returns true if all elements of build_vector
|
|
|
|
/// are -0.0.
|
|
|
|
bool PPC::isAllNegativeZeroVector(SDNode *N) {
|
2009-04-28 02:41:29 +08:00
|
|
|
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
|
|
|
|
|
|
|
|
APInt APVal, APUndef;
|
|
|
|
unsigned BitSize;
|
|
|
|
bool HasAnyUndefs;
|
|
|
|
|
|
|
|
if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32))
|
|
|
|
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
|
2007-08-31 12:03:46 +08:00
|
|
|
return CFP->getValueAPF().isNegZero();
|
2009-04-28 02:41:29 +08:00
|
|
|
|
2007-07-30 15:51:22 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2006-03-20 14:33:01 +08:00
|
|
|
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
|
|
|
|
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
|
2006-04-05 01:25:31 +08:00
|
|
|
unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
|
2009-04-28 02:41:29 +08:00
|
|
|
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
|
|
|
|
assert(isSplatShuffleMask(SVOp, EltSize));
|
|
|
|
return SVOp->getMaskElt(0) / EltSize;
|
2006-03-20 14:33:01 +08:00
|
|
|
}
|
|
|
|
|
2006-04-13 01:37:20 +08:00
|
|
|
/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
|
2006-04-08 14:46:53 +08:00
|
|
|
/// by using a vspltis[bhw] instruction of the specified element size, return
|
|
|
|
/// the constant being splatted. The ByteSize field indicates the number of
|
|
|
|
/// bytes of each element [124] -> [bhw].
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
|
|
|
|
SDValue OpVal(0, 0);
|
2006-04-08 15:14:26 +08:00
|
|
|
|
|
|
|
// If ByteSize of the splat is bigger than the element size of the
|
|
|
|
// build_vector, then we have a case where we are checking for a splat where
|
|
|
|
// multiple elements of the buildvector are folded together into a single
|
|
|
|
// logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
|
|
|
|
unsigned EltSize = 16/N->getNumOperands();
|
|
|
|
if (EltSize < ByteSize) {
|
|
|
|
unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue UniquedVals[4];
|
2006-04-08 15:14:26 +08:00
|
|
|
assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-08 15:14:26 +08:00
|
|
|
// See if all of the elements in the buildvector agree across.
|
|
|
|
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
|
|
|
|
if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
|
|
|
|
// If the element isn't a constant, bail fully out.
|
2008-07-28 05:46:04 +08:00
|
|
|
if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
|
2006-04-08 15:14:26 +08:00
|
|
|
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-08-29 05:40:38 +08:00
|
|
|
if (UniquedVals[i&(Multiple-1)].getNode() == 0)
|
2006-04-08 15:14:26 +08:00
|
|
|
UniquedVals[i&(Multiple-1)] = N->getOperand(i);
|
|
|
|
else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
|
2008-07-28 05:46:04 +08:00
|
|
|
return SDValue(); // no match.
|
2006-04-08 15:14:26 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-08 15:14:26 +08:00
|
|
|
// Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
|
|
|
|
// either constant or undef values that are identical for each chunk. See
|
|
|
|
// if these chunks can form into a larger vspltis*.
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-08 15:14:26 +08:00
|
|
|
// Check to see if all of the leading entries are either 0 or -1. If
|
|
|
|
// neither, then this won't fit into the immediate field.
|
|
|
|
bool LeadingZero = true;
|
|
|
|
bool LeadingOnes = true;
|
|
|
|
for (unsigned i = 0; i != Multiple-1; ++i) {
|
2008-08-29 05:40:38 +08:00
|
|
|
if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs.
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-08 15:14:26 +08:00
|
|
|
LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
|
|
|
|
LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
|
|
|
|
}
|
|
|
|
// Finally, check the least significant entry.
|
|
|
|
if (LeadingZero) {
|
2008-08-29 05:40:38 +08:00
|
|
|
if (UniquedVals[Multiple-1].getNode() == 0)
|
2009-08-12 04:47:22 +08:00
|
|
|
return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef
|
2008-09-13 00:56:44 +08:00
|
|
|
int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
|
2006-04-08 15:14:26 +08:00
|
|
|
if (Val < 16)
|
2009-08-12 04:47:22 +08:00
|
|
|
return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4)
|
2006-04-08 15:14:26 +08:00
|
|
|
}
|
|
|
|
if (LeadingOnes) {
|
2008-08-29 05:40:38 +08:00
|
|
|
if (UniquedVals[Multiple-1].getNode() == 0)
|
2009-08-12 04:47:22 +08:00
|
|
|
return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef
|
2008-09-27 05:54:37 +08:00
|
|
|
int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
|
2006-04-08 15:14:26 +08:00
|
|
|
if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
|
2009-08-12 04:47:22 +08:00
|
|
|
return DAG.getTargetConstant(Val, MVT::i32);
|
2006-04-08 15:14:26 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
return SDValue();
|
2006-04-08 15:14:26 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-03-25 14:12:06 +08:00
|
|
|
// Check to see if this buildvec has a single non-undef value in its elements.
|
|
|
|
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
|
|
|
|
if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
|
2008-08-29 05:40:38 +08:00
|
|
|
if (OpVal.getNode() == 0)
|
2006-03-25 14:12:06 +08:00
|
|
|
OpVal = N->getOperand(i);
|
|
|
|
else if (OpVal != N->getOperand(i))
|
2008-07-28 05:46:04 +08:00
|
|
|
return SDValue();
|
2006-03-25 14:12:06 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-08-29 05:40:38 +08:00
|
|
|
if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def.
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-05-24 10:03:36 +08:00
|
|
|
unsigned ValSizeInBytes = EltSize;
|
2006-03-28 12:15:58 +08:00
|
|
|
uint64_t Value = 0;
|
2006-03-25 14:12:06 +08:00
|
|
|
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
|
2008-09-13 00:56:44 +08:00
|
|
|
Value = CN->getZExtValue();
|
2006-03-25 14:12:06 +08:00
|
|
|
} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
|
2009-08-12 04:47:22 +08:00
|
|
|
assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
|
2007-08-31 12:03:46 +08:00
|
|
|
Value = FloatToBits(CN->getValueAPF().convertToFloat());
|
2006-03-25 14:12:06 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// If the splat value is larger than the element value, then we can never do
|
|
|
|
// this splat. The only case that we could fit the replicated bits into our
|
|
|
|
// immediate field for would be zero, and we prefer to use vxor for it.
|
2008-07-28 05:46:04 +08:00
|
|
|
if (ValSizeInBytes < ByteSize) return SDValue();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-03-25 14:12:06 +08:00
|
|
|
// If the element value is larger than the splat value, cut it in half and
|
|
|
|
// check to see if the two halves are equal. Continue doing this until we
|
|
|
|
// get to ByteSize. This allows us to handle 0x01010101 as 0x01.
|
|
|
|
while (ValSizeInBytes > ByteSize) {
|
|
|
|
ValSizeInBytes >>= 1;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-03-25 14:12:06 +08:00
|
|
|
// If the top half equals the bottom half, we're still ok.
|
2006-04-06 01:39:25 +08:00
|
|
|
if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
|
|
|
|
(Value & ((1 << (8*ValSizeInBytes))-1)))
|
2008-07-28 05:46:04 +08:00
|
|
|
return SDValue();
|
2006-03-25 14:12:06 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Properly sign extend the value.
|
|
|
|
int ShAmt = (4-ByteSize)*8;
|
|
|
|
int MaskVal = ((int)Value << ShAmt) >> ShAmt;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-03-26 17:52:32 +08:00
|
|
|
// If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
|
2008-07-28 05:46:04 +08:00
|
|
|
if (MaskVal == 0) return SDValue();
|
2006-03-25 14:12:06 +08:00
|
|
|
|
2006-04-08 14:46:53 +08:00
|
|
|
// Finally, if this value fits in a 5 bit sext field, return it
|
|
|
|
if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)
|
2009-08-12 04:47:22 +08:00
|
|
|
return DAG.getTargetConstant(MaskVal, MVT::i32);
|
2008-07-28 05:46:04 +08:00
|
|
|
return SDValue();
|
2006-03-25 14:12:06 +08:00
|
|
|
}
|
|
|
|
|
2006-11-08 10:15:41 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Addressing Mode Selection
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
|
|
|
|
/// or 64-bit immediate, and if the value can be accurately represented as a
|
|
|
|
/// sign extension from a 16-bit value. If so, this returns true and the
|
|
|
|
/// immediate.
|
|
|
|
static bool isIntS16Immediate(SDNode *N, short &Imm) {
|
|
|
|
if (N->getOpcode() != ISD::Constant)
|
|
|
|
return false;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-09-13 00:56:44 +08:00
|
|
|
Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
|
2009-08-12 04:47:22 +08:00
|
|
|
if (N->getValueType(0) == MVT::i32)
|
2008-09-13 00:56:44 +08:00
|
|
|
return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
|
2006-11-08 10:15:41 +08:00
|
|
|
else
|
2008-09-13 00:56:44 +08:00
|
|
|
return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
|
2006-11-08 10:15:41 +08:00
|
|
|
}
|
2008-07-28 05:46:04 +08:00
|
|
|
static bool isIntS16Immediate(SDValue Op, short &Imm) {
|
2008-08-29 05:40:38 +08:00
|
|
|
return isIntS16Immediate(Op.getNode(), Imm);
|
2006-11-08 10:15:41 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// SelectAddressRegReg - Given the specified addressed, check to see if it
|
|
|
|
/// can be represented as an indexed [r+r] operation. Returns false if it
|
|
|
|
/// can be more efficiently represented with [r+imm].
|
2008-07-28 05:46:04 +08:00
|
|
|
bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
|
|
|
|
SDValue &Index,
|
2009-01-16 00:29:45 +08:00
|
|
|
SelectionDAG &DAG) const {
|
2006-11-08 10:15:41 +08:00
|
|
|
short imm = 0;
|
|
|
|
if (N.getOpcode() == ISD::ADD) {
|
|
|
|
if (isIntS16Immediate(N.getOperand(1), imm))
|
|
|
|
return false; // r+i
|
|
|
|
if (N.getOperand(1).getOpcode() == PPCISD::Lo)
|
|
|
|
return false; // r+i
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-11-08 10:15:41 +08:00
|
|
|
Base = N.getOperand(0);
|
|
|
|
Index = N.getOperand(1);
|
|
|
|
return true;
|
|
|
|
} else if (N.getOpcode() == ISD::OR) {
|
|
|
|
if (isIntS16Immediate(N.getOperand(1), imm))
|
|
|
|
return false; // r+i can fold it if we can.
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-11-08 10:15:41 +08:00
|
|
|
// If this is an or of disjoint bitfields, we can codegen this as an add
|
|
|
|
// (for better address arithmetic) if the LHS and RHS of the OR are provably
|
|
|
|
// disjoint.
|
2008-02-27 09:23:58 +08:00
|
|
|
APInt LHSKnownZero, LHSKnownOne;
|
|
|
|
APInt RHSKnownZero, RHSKnownOne;
|
|
|
|
DAG.ComputeMaskedBits(N.getOperand(0),
|
2008-02-28 05:12:32 +08:00
|
|
|
APInt::getAllOnesValue(N.getOperand(0)
|
|
|
|
.getValueSizeInBits()),
|
2008-02-27 09:23:58 +08:00
|
|
|
LHSKnownZero, LHSKnownOne);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-02-27 09:23:58 +08:00
|
|
|
if (LHSKnownZero.getBoolValue()) {
|
|
|
|
DAG.ComputeMaskedBits(N.getOperand(1),
|
2008-02-28 05:12:32 +08:00
|
|
|
APInt::getAllOnesValue(N.getOperand(1)
|
|
|
|
.getValueSizeInBits()),
|
2008-02-27 09:23:58 +08:00
|
|
|
RHSKnownZero, RHSKnownOne);
|
2006-11-08 10:15:41 +08:00
|
|
|
// If all of the bits are known zero on the LHS or RHS, the add won't
|
|
|
|
// carry.
|
2008-02-28 05:12:32 +08:00
|
|
|
if (~(LHSKnownZero | RHSKnownZero) == 0) {
|
2006-11-08 10:15:41 +08:00
|
|
|
Base = N.getOperand(0);
|
|
|
|
Index = N.getOperand(1);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-11-08 10:15:41 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns true if the address N can be represented by a base register plus
|
|
|
|
/// a signed 16-bit displacement [r+imm], and if it is not better
|
|
|
|
/// represented as reg+reg.
|
2008-07-28 05:46:04 +08:00
|
|
|
bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
|
2009-01-16 00:29:45 +08:00
|
|
|
SDValue &Base,
|
|
|
|
SelectionDAG &DAG) const {
|
2009-02-07 03:16:40 +08:00
|
|
|
// FIXME dl should come from parent load or store, not from address
|
|
|
|
DebugLoc dl = N.getDebugLoc();
|
2006-11-08 10:15:41 +08:00
|
|
|
// If this can be more profitably realized as r+r, fail.
|
|
|
|
if (SelectAddressRegReg(N, Disp, Base, DAG))
|
|
|
|
return false;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-11-08 10:15:41 +08:00
|
|
|
if (N.getOpcode() == ISD::ADD) {
|
|
|
|
short imm = 0;
|
|
|
|
if (isIntS16Immediate(N.getOperand(1), imm)) {
|
2009-08-12 04:47:22 +08:00
|
|
|
Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
|
2006-11-08 10:15:41 +08:00
|
|
|
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
|
|
|
|
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
|
|
|
|
} else {
|
|
|
|
Base = N.getOperand(0);
|
|
|
|
}
|
|
|
|
return true; // [r+i]
|
|
|
|
} else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
|
|
|
|
// Match LOAD (ADD (X, Lo(G))).
|
2008-09-13 00:56:44 +08:00
|
|
|
assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
|
2006-11-08 10:15:41 +08:00
|
|
|
&& "Cannot handle constant offsets yet!");
|
|
|
|
Disp = N.getOperand(1).getOperand(0); // The global address.
|
|
|
|
assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
|
|
|
|
Disp.getOpcode() == ISD::TargetConstantPool ||
|
|
|
|
Disp.getOpcode() == ISD::TargetJumpTable);
|
|
|
|
Base = N.getOperand(0);
|
|
|
|
return true; // [&g+r]
|
|
|
|
}
|
|
|
|
} else if (N.getOpcode() == ISD::OR) {
|
|
|
|
short imm = 0;
|
|
|
|
if (isIntS16Immediate(N.getOperand(1), imm)) {
|
|
|
|
// If this is an or of disjoint bitfields, we can codegen this as an add
|
|
|
|
// (for better address arithmetic) if the LHS and RHS of the OR are
|
|
|
|
// provably disjoint.
|
2008-02-27 09:23:58 +08:00
|
|
|
APInt LHSKnownZero, LHSKnownOne;
|
|
|
|
DAG.ComputeMaskedBits(N.getOperand(0),
|
2008-03-25 07:16:37 +08:00
|
|
|
APInt::getAllOnesValue(N.getOperand(0)
|
|
|
|
.getValueSizeInBits()),
|
2008-02-27 09:23:58 +08:00
|
|
|
LHSKnownZero, LHSKnownOne);
|
2008-03-25 07:16:37 +08:00
|
|
|
|
2008-02-27 09:23:58 +08:00
|
|
|
if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
|
2006-11-08 10:15:41 +08:00
|
|
|
// If all of the bits are known zero on the LHS or RHS, the add won't
|
|
|
|
// carry.
|
|
|
|
Base = N.getOperand(0);
|
2009-08-12 04:47:22 +08:00
|
|
|
Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
|
2006-11-08 10:15:41 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
|
|
|
|
// Loading from a constant address.
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-11-08 10:15:41 +08:00
|
|
|
// If this address fits entirely in a 16-bit sext immediate field, codegen
|
|
|
|
// this as "d, 0"
|
|
|
|
short Imm;
|
|
|
|
if (isIntS16Immediate(CN, Imm)) {
|
|
|
|
Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
|
|
|
|
Base = DAG.getRegister(PPC::R0, CN->getValueType(0));
|
|
|
|
return true;
|
|
|
|
}
|
2007-02-17 14:44:03 +08:00
|
|
|
|
|
|
|
// Handle 32-bit sext immediates with LIS + addr mode.
|
2009-08-12 04:47:22 +08:00
|
|
|
if (CN->getValueType(0) == MVT::i32 ||
|
2008-09-13 00:56:44 +08:00
|
|
|
(int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
|
|
|
|
int Addr = (int)CN->getZExtValue();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-11-08 10:15:41 +08:00
|
|
|
// Otherwise, break this down into an LIS + disp.
|
2009-08-12 04:47:22 +08:00
|
|
|
Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
|
|
|
|
unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
|
2009-09-26 02:54:59 +08:00
|
|
|
Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
|
2006-11-08 10:15:41 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-11-08 10:15:41 +08:00
|
|
|
Disp = DAG.getTargetConstant(0, getPointerTy());
|
|
|
|
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
|
|
|
|
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
|
|
|
|
else
|
|
|
|
Base = N;
|
|
|
|
return true; // [r+0]
|
|
|
|
}
|
|
|
|
|
|
|
|
/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
|
|
|
|
/// represented as an indexed [r+r] operation.
|
2008-07-28 05:46:04 +08:00
|
|
|
bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
|
|
|
|
SDValue &Index,
|
2009-01-16 00:29:45 +08:00
|
|
|
SelectionDAG &DAG) const {
|
2006-11-08 10:15:41 +08:00
|
|
|
// Check to see if we can easily represent this as an [r+r] address. This
|
|
|
|
// will fail if it thinks that the address is more profitably represented as
|
|
|
|
// reg+imm, e.g. where imm = 0.
|
|
|
|
if (SelectAddressRegReg(N, Base, Index, DAG))
|
|
|
|
return true;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-11-08 10:15:41 +08:00
|
|
|
// If the operand is an addition, always emit this as [r+r], since this is
|
|
|
|
// better (for code size, and execution, as the memop does the add for free)
|
|
|
|
// than emitting an explicit add.
|
|
|
|
if (N.getOpcode() == ISD::ADD) {
|
|
|
|
Base = N.getOperand(0);
|
|
|
|
Index = N.getOperand(1);
|
|
|
|
return true;
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-11-08 10:15:41 +08:00
|
|
|
// Otherwise, do it the hard way, using R0 as the base register.
|
|
|
|
Base = DAG.getRegister(PPC::R0, N.getValueType());
|
|
|
|
Index = N;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// SelectAddressRegImmShift - Returns true if the address N can be
|
|
|
|
/// represented by a base register plus a signed 14-bit displacement
|
|
|
|
/// [r+imm*4]. Suitable for use by STD and friends.
|
2008-07-28 05:46:04 +08:00
|
|
|
bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
|
|
|
|
SDValue &Base,
|
2009-01-16 00:29:45 +08:00
|
|
|
SelectionDAG &DAG) const {
|
2009-02-07 03:16:40 +08:00
|
|
|
// FIXME dl should come from the parent load or store, not the address
|
|
|
|
DebugLoc dl = N.getDebugLoc();
|
2006-11-08 10:15:41 +08:00
|
|
|
// If this can be more profitably realized as r+r, fail.
|
|
|
|
if (SelectAddressRegReg(N, Disp, Base, DAG))
|
|
|
|
return false;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-11-08 10:15:41 +08:00
|
|
|
if (N.getOpcode() == ISD::ADD) {
|
|
|
|
short imm = 0;
|
|
|
|
if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
|
2009-08-12 04:47:22 +08:00
|
|
|
Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
|
2006-11-08 10:15:41 +08:00
|
|
|
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
|
|
|
|
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
|
|
|
|
} else {
|
|
|
|
Base = N.getOperand(0);
|
|
|
|
}
|
|
|
|
return true; // [r+i]
|
|
|
|
} else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
|
|
|
|
// Match LOAD (ADD (X, Lo(G))).
|
2008-09-13 00:56:44 +08:00
|
|
|
assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
|
2006-11-08 10:15:41 +08:00
|
|
|
&& "Cannot handle constant offsets yet!");
|
|
|
|
Disp = N.getOperand(1).getOperand(0); // The global address.
|
|
|
|
assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
|
|
|
|
Disp.getOpcode() == ISD::TargetConstantPool ||
|
|
|
|
Disp.getOpcode() == ISD::TargetJumpTable);
|
|
|
|
Base = N.getOperand(0);
|
|
|
|
return true; // [&g+r]
|
|
|
|
}
|
|
|
|
} else if (N.getOpcode() == ISD::OR) {
|
|
|
|
short imm = 0;
|
|
|
|
if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
|
|
|
|
// If this is an or of disjoint bitfields, we can codegen this as an add
|
|
|
|
// (for better address arithmetic) if the LHS and RHS of the OR are
|
|
|
|
// provably disjoint.
|
2008-02-27 09:23:58 +08:00
|
|
|
APInt LHSKnownZero, LHSKnownOne;
|
|
|
|
DAG.ComputeMaskedBits(N.getOperand(0),
|
2008-03-25 07:16:37 +08:00
|
|
|
APInt::getAllOnesValue(N.getOperand(0)
|
|
|
|
.getValueSizeInBits()),
|
2008-02-27 09:23:58 +08:00
|
|
|
LHSKnownZero, LHSKnownOne);
|
|
|
|
if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
|
2006-11-08 10:15:41 +08:00
|
|
|
// If all of the bits are known zero on the LHS or RHS, the add won't
|
|
|
|
// carry.
|
|
|
|
Base = N.getOperand(0);
|
2009-08-12 04:47:22 +08:00
|
|
|
Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
|
2006-11-08 10:15:41 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
|
2007-02-17 14:57:26 +08:00
|
|
|
// Loading from a constant address. Verify low two bits are clear.
|
2008-09-13 00:56:44 +08:00
|
|
|
if ((CN->getZExtValue() & 3) == 0) {
|
2007-02-17 14:57:26 +08:00
|
|
|
// If this address fits entirely in a 14-bit sext immediate field, codegen
|
|
|
|
// this as "d, 0"
|
|
|
|
short Imm;
|
|
|
|
if (isIntS16Immediate(CN, Imm)) {
|
|
|
|
Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
|
|
|
|
Base = DAG.getRegister(PPC::R0, CN->getValueType(0));
|
|
|
|
return true;
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2007-02-17 14:57:26 +08:00
|
|
|
// Fold the low-part of 32-bit absolute addresses into addr mode.
|
2009-08-12 04:47:22 +08:00
|
|
|
if (CN->getValueType(0) == MVT::i32 ||
|
2008-09-13 00:56:44 +08:00
|
|
|
(int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
|
|
|
|
int Addr = (int)CN->getZExtValue();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2007-02-17 14:57:26 +08:00
|
|
|
// Otherwise, break this down into an LIS + disp.
|
2009-08-12 04:47:22 +08:00
|
|
|
Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32);
|
|
|
|
Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32);
|
|
|
|
unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
|
2009-09-26 02:54:59 +08:00
|
|
|
Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base),0);
|
2007-02-17 14:57:26 +08:00
|
|
|
return true;
|
|
|
|
}
|
2006-11-08 10:15:41 +08:00
|
|
|
}
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-11-08 10:15:41 +08:00
|
|
|
Disp = DAG.getTargetConstant(0, getPointerTy());
|
|
|
|
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
|
|
|
|
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
|
|
|
|
else
|
|
|
|
Base = N;
|
|
|
|
return true; // [r+0]
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// getPreIndexedAddressParts - returns true by value, base pointer and
|
|
|
|
/// offset pointer and addressing mode by reference if the node's address
|
|
|
|
/// can be legally represented as pre-indexed load / store address.
|
2008-07-28 05:46:04 +08:00
|
|
|
bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
|
|
|
|
SDValue &Offset,
|
2006-11-10 01:55:04 +08:00
|
|
|
ISD::MemIndexedMode &AM,
|
2009-01-16 00:29:45 +08:00
|
|
|
SelectionDAG &DAG) const {
|
2006-11-10 10:08:47 +08:00
|
|
|
// Disabled by default for now.
|
|
|
|
if (!EnablePPCPreinc) return false;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Ptr;
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT VT;
|
2006-11-08 10:15:41 +08:00
|
|
|
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
|
|
|
|
Ptr = LD->getBasePtr();
|
2008-01-30 08:15:11 +08:00
|
|
|
VT = LD->getMemoryVT();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-11-08 10:15:41 +08:00
|
|
|
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
|
2006-11-10 10:08:47 +08:00
|
|
|
ST = ST;
|
2006-11-14 09:38:31 +08:00
|
|
|
Ptr = ST->getBasePtr();
|
2008-01-30 08:15:11 +08:00
|
|
|
VT = ST->getMemoryVT();
|
2006-11-08 10:15:41 +08:00
|
|
|
} else
|
|
|
|
return false;
|
|
|
|
|
2006-11-14 09:38:31 +08:00
|
|
|
// PowerPC doesn't have preinc load/store instructions for vectors.
|
2008-06-06 20:08:01 +08:00
|
|
|
if (VT.isVector())
|
2006-11-14 09:38:31 +08:00
|
|
|
return false;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-11-16 03:55:13 +08:00
|
|
|
// TODO: Check reg+reg first.
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-11-16 03:55:13 +08:00
|
|
|
// LDU/STU use reg+imm*4, others use reg+imm.
|
2009-08-12 04:47:22 +08:00
|
|
|
if (VT != MVT::i64) {
|
2006-11-16 03:55:13 +08:00
|
|
|
// reg + imm
|
|
|
|
if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
// reg + imm * 4.
|
|
|
|
if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
|
|
|
|
return false;
|
|
|
|
}
|
2006-11-11 08:08:42 +08:00
|
|
|
|
|
|
|
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
|
2006-11-16 03:55:13 +08:00
|
|
|
// PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
|
|
|
|
// sext i32 to i64 when addr mode is r+i.
|
2009-08-12 04:47:22 +08:00
|
|
|
if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
|
2006-11-11 08:08:42 +08:00
|
|
|
LD->getExtensionType() == ISD::SEXTLOAD &&
|
|
|
|
isa<ConstantSDNode>(Offset))
|
|
|
|
return false;
|
2009-02-18 06:15:04 +08:00
|
|
|
}
|
|
|
|
|
2006-11-10 10:08:47 +08:00
|
|
|
AM = ISD::PRE_INC;
|
|
|
|
return true;
|
2006-11-08 10:15:41 +08:00
|
|
|
}
|
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// LowerOperation implementation
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2009-02-18 06:15:04 +08:00
|
|
|
SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
|
2008-03-05 07:17:14 +08:00
|
|
|
SelectionDAG &DAG) {
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT PtrVT = Op.getValueType();
|
2006-04-14 14:01:58 +08:00
|
|
|
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
|
2006-09-13 05:04:05 +08:00
|
|
|
Constant *C = CP->getConstVal();
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
|
|
|
|
SDValue Zero = DAG.getConstant(0, PtrVT);
|
2009-02-07 05:50:26 +08:00
|
|
|
// FIXME there isn't really any debug info here
|
|
|
|
DebugLoc dl = Op.getDebugLoc();
|
2006-04-14 14:01:58 +08:00
|
|
|
|
|
|
|
const TargetMachine &TM = DAG.getTarget();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-02-07 05:50:26 +08:00
|
|
|
SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, CPI, Zero);
|
|
|
|
SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, CPI, Zero);
|
2006-06-17 05:01:35 +08:00
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
// If this is a non-darwin platform, we don't support non-static relo models
|
|
|
|
// yet.
|
|
|
|
if (TM.getRelocationModel() == Reloc::Static ||
|
|
|
|
!TM.getSubtarget<PPCSubtarget>().isDarwin()) {
|
|
|
|
// Generate non-pic code that has direct accesses to the constant pool.
|
|
|
|
// The address of the global is just (hi(&g)+lo(&g)).
|
2009-02-07 05:50:26 +08:00
|
|
|
return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
|
2006-04-14 14:01:58 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-07-27 05:12:04 +08:00
|
|
|
if (TM.getRelocationModel() == Reloc::PIC_) {
|
2006-04-14 14:01:58 +08:00
|
|
|
// With PIC, the first instruction is actually "GR+hi(&G)".
|
2009-02-07 05:50:26 +08:00
|
|
|
Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
|
2009-02-18 06:15:04 +08:00
|
|
|
DAG.getNode(PPCISD::GlobalBaseReg,
|
2009-02-07 08:55:49 +08:00
|
|
|
DebugLoc::getUnknownLoc(), PtrVT), Hi);
|
2006-04-14 14:01:58 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-02-07 05:50:26 +08:00
|
|
|
Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
|
2006-04-14 14:01:58 +08:00
|
|
|
return Lo;
|
|
|
|
}
|
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT PtrVT = Op.getValueType();
|
2006-04-23 02:53:45 +08:00
|
|
|
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
|
|
|
|
SDValue Zero = DAG.getConstant(0, PtrVT);
|
2009-02-07 05:50:26 +08:00
|
|
|
// FIXME there isn't really any debug loc here
|
|
|
|
DebugLoc dl = Op.getDebugLoc();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-23 02:53:45 +08:00
|
|
|
const TargetMachine &TM = DAG.getTarget();
|
2006-06-17 05:01:35 +08:00
|
|
|
|
2009-02-07 05:50:26 +08:00
|
|
|
SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, JTI, Zero);
|
|
|
|
SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, JTI, Zero);
|
2006-06-17 05:01:35 +08:00
|
|
|
|
2006-04-23 02:53:45 +08:00
|
|
|
// If this is a non-darwin platform, we don't support non-static relo models
|
|
|
|
// yet.
|
|
|
|
if (TM.getRelocationModel() == Reloc::Static ||
|
|
|
|
!TM.getSubtarget<PPCSubtarget>().isDarwin()) {
|
|
|
|
// Generate non-pic code that has direct accesses to the constant pool.
|
|
|
|
// The address of the global is just (hi(&g)+lo(&g)).
|
2009-02-07 05:50:26 +08:00
|
|
|
return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
|
2006-04-23 02:53:45 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-07-27 05:12:04 +08:00
|
|
|
if (TM.getRelocationModel() == Reloc::PIC_) {
|
2006-04-23 02:53:45 +08:00
|
|
|
// With PIC, the first instruction is actually "GR+hi(&G)".
|
2009-02-07 05:50:26 +08:00
|
|
|
Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
|
2009-02-18 06:15:04 +08:00
|
|
|
DAG.getNode(PPCISD::GlobalBaseReg,
|
2009-02-07 08:55:49 +08:00
|
|
|
DebugLoc::getUnknownLoc(), PtrVT), Hi);
|
2006-04-23 02:53:45 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-02-07 05:50:26 +08:00
|
|
|
Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
|
2006-04-23 02:53:45 +08:00
|
|
|
return Lo;
|
|
|
|
}
|
|
|
|
|
2009-02-18 06:15:04 +08:00
|
|
|
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
|
2008-03-05 07:17:14 +08:00
|
|
|
SelectionDAG &DAG) {
|
2009-07-15 00:55:14 +08:00
|
|
|
llvm_unreachable("TLS not implemented for PPC.");
|
2008-07-28 05:46:04 +08:00
|
|
|
return SDValue(); // Not reached
|
2007-07-12 01:19:51 +08:00
|
|
|
}
|
|
|
|
|
2009-02-18 06:15:04 +08:00
|
|
|
SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
|
2009-01-17 06:57:32 +08:00
|
|
|
SelectionDAG &DAG) {
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT PtrVT = Op.getValueType();
|
2006-04-14 14:01:58 +08:00
|
|
|
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
|
|
|
|
GlobalValue *GV = GSDN->getGlobal();
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
|
|
|
|
SDValue Zero = DAG.getConstant(0, PtrVT);
|
2009-02-07 05:50:26 +08:00
|
|
|
// FIXME there isn't really any debug info here
|
2009-02-05 04:06:27 +08:00
|
|
|
DebugLoc dl = GSDN->getDebugLoc();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
const TargetMachine &TM = DAG.getTarget();
|
|
|
|
|
2009-08-15 19:54:46 +08:00
|
|
|
// 64-bit SVR4 ABI code is always position-independent.
|
|
|
|
// The actual address of the GlobalValue is stored in the TOC.
|
|
|
|
if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
|
|
|
|
return DAG.getNode(PPCISD::TOC_ENTRY, dl, MVT::i64, GA,
|
|
|
|
DAG.getRegister(PPC::X2, MVT::i64));
|
|
|
|
}
|
|
|
|
|
2009-02-05 04:06:27 +08:00
|
|
|
SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, GA, Zero);
|
|
|
|
SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, GA, Zero);
|
2006-06-17 05:01:35 +08:00
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
// If this is a non-darwin platform, we don't support non-static relo models
|
|
|
|
// yet.
|
|
|
|
if (TM.getRelocationModel() == Reloc::Static ||
|
|
|
|
!TM.getSubtarget<PPCSubtarget>().isDarwin()) {
|
|
|
|
// Generate non-pic code that has direct accesses to globals.
|
|
|
|
// The address of the global is just (hi(&g)+lo(&g)).
|
2009-02-05 04:06:27 +08:00
|
|
|
return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
|
2006-04-14 14:01:58 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-07-27 05:12:04 +08:00
|
|
|
if (TM.getRelocationModel() == Reloc::PIC_) {
|
2006-04-14 14:01:58 +08:00
|
|
|
// With PIC, the first instruction is actually "GR+hi(&G)".
|
2009-02-05 04:06:27 +08:00
|
|
|
Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
|
2009-02-18 06:15:04 +08:00
|
|
|
DAG.getNode(PPCISD::GlobalBaseReg,
|
2009-02-07 08:55:49 +08:00
|
|
|
DebugLoc::getUnknownLoc(), PtrVT), Hi);
|
2006-04-14 14:01:58 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-02-05 04:06:27 +08:00
|
|
|
Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-08-03 06:11:08 +08:00
|
|
|
if (!TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM))
|
2006-04-14 14:01:58 +08:00
|
|
|
return Lo;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
// If the global is weak or external, we have to go through the lazy
|
|
|
|
// resolution stub.
|
2009-02-05 04:06:27 +08:00
|
|
|
return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0);
|
2006-04-14 14:01:58 +08:00
|
|
|
}
|
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
|
2006-04-14 14:01:58 +08:00
|
|
|
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
|
2009-02-08 03:59:05 +08:00
|
|
|
DebugLoc dl = Op.getDebugLoc();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
// If we're comparing for equality to zero, expose the fact that this is
|
|
|
|
// implented as a ctlz/srl pair on ppc, so that the dag combiner can
|
|
|
|
// fold the new nodes.
|
|
|
|
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
|
|
|
|
if (C->isNullValue() && CC == ISD::SETEQ) {
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT VT = Op.getOperand(0).getValueType();
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Zext = Op.getOperand(0);
|
2009-08-12 04:47:22 +08:00
|
|
|
if (VT.bitsLT(MVT::i32)) {
|
|
|
|
VT = MVT::i32;
|
2009-02-04 09:48:28 +08:00
|
|
|
Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
|
2009-02-18 06:15:04 +08:00
|
|
|
}
|
2008-06-06 20:08:01 +08:00
|
|
|
unsigned Log2b = Log2_32(VT.getSizeInBits());
|
2009-02-04 09:48:28 +08:00
|
|
|
SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
|
|
|
|
SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
|
2009-08-12 04:47:22 +08:00
|
|
|
DAG.getConstant(Log2b, MVT::i32));
|
|
|
|
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
|
2006-04-14 14:01:58 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
// Leave comparisons against 0 and -1 alone for now, since they're usually
|
2006-04-14 14:01:58 +08:00
|
|
|
// optimized. FIXME: revisit this when we can custom lower all setcc
|
|
|
|
// optimizations.
|
|
|
|
if (C->isAllOnesValue() || C->isNullValue())
|
2008-07-28 05:46:04 +08:00
|
|
|
return SDValue();
|
2006-04-14 14:01:58 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
// If we have an integer seteq/setne, turn it into a compare against zero
|
lower "X = seteq Y, Z" to '(shr (ctlz (xor Y, Z)), 5)' instead of
'(shr (ctlz (sub Y, Z)), 5)'.
The use of xor better exposes the operation to bit-twiddling logic in the
dag combiner. For example, this:
typedef struct {
unsigned prefix : 4;
unsigned code : 4;
unsigned unsigned_p : 4;
} tree_common;
int foo(tree_common *a, tree_common *b) {
return a->code == b->code;
}
Now compiles to:
_foo:
lwz r2, 0(r4)
lwz r3, 0(r3)
xor r2, r3, r2
rlwinm r2, r2, 28, 28, 31
cntlzw r2, r2
srwi r3, r2, 5
blr
instead of:
_foo:
lbz r2, 3(r4)
lbz r3, 3(r3)
srwi r2, r2, 4
srwi r3, r3, 4
subf r2, r2, r3
cntlzw r2, r2
srwi r3, r2, 5
blr
saving a cycle.
llvm-svn: 31725
2006-11-14 13:28:08 +08:00
|
|
|
// by xor'ing the rhs with the lhs, which is faster than setting a
|
|
|
|
// condition register, reading it back out, and masking the correct bit. The
|
|
|
|
// normal approach here uses sub to do this instead of xor. Using xor exposes
|
|
|
|
// the result to other bit-twiddling opportunities.
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT LHSVT = Op.getOperand(0).getValueType();
|
2008-06-06 20:08:01 +08:00
|
|
|
if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT VT = Op.getValueType();
|
2009-02-18 06:15:04 +08:00
|
|
|
SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
|
2006-04-14 14:01:58 +08:00
|
|
|
Op.getOperand(1));
|
2009-02-04 09:48:28 +08:00
|
|
|
return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
|
2006-04-14 14:01:58 +08:00
|
|
|
}
|
2008-07-28 05:46:04 +08:00
|
|
|
return SDValue();
|
2006-04-14 14:01:58 +08:00
|
|
|
}
|
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
|
2007-04-03 21:59:52 +08:00
|
|
|
int VarArgsFrameIndex,
|
|
|
|
int VarArgsStackOffset,
|
|
|
|
unsigned VarArgsNumGPR,
|
|
|
|
unsigned VarArgsNumFPR,
|
|
|
|
const PPCSubtarget &Subtarget) {
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-07-15 00:55:14 +08:00
|
|
|
llvm_unreachable("VAARG not yet implemented for the SVR4 ABI!");
|
2008-07-28 05:46:04 +08:00
|
|
|
return SDValue(); // Not reached
|
2007-04-03 21:59:52 +08:00
|
|
|
}
|
|
|
|
|
2008-09-17 08:30:57 +08:00
|
|
|
SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
|
|
|
|
SDValue Chain = Op.getOperand(0);
|
|
|
|
SDValue Trmp = Op.getOperand(1); // trampoline
|
|
|
|
SDValue FPtr = Op.getOperand(2); // nested function
|
|
|
|
SDValue Nest = Op.getOperand(3); // 'nest' parameter value
|
2009-02-08 03:59:05 +08:00
|
|
|
DebugLoc dl = Op.getDebugLoc();
|
2008-09-17 08:30:57 +08:00
|
|
|
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
2009-08-12 04:47:22 +08:00
|
|
|
bool isPPC64 = (PtrVT == MVT::i64);
|
2008-09-17 08:30:57 +08:00
|
|
|
const Type *IntPtrTy =
|
2009-08-14 05:58:54 +08:00
|
|
|
DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType(
|
|
|
|
*DAG.getContext());
|
2008-09-17 08:30:57 +08:00
|
|
|
|
2009-02-18 06:15:04 +08:00
|
|
|
TargetLowering::ArgListTy Args;
|
2008-09-17 08:30:57 +08:00
|
|
|
TargetLowering::ArgListEntry Entry;
|
|
|
|
|
|
|
|
Entry.Ty = IntPtrTy;
|
|
|
|
Entry.Node = Trmp; Args.push_back(Entry);
|
|
|
|
|
|
|
|
// TrampSize == (isPPC64 ? 48 : 40);
|
|
|
|
Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
|
2009-08-12 04:47:22 +08:00
|
|
|
isPPC64 ? MVT::i64 : MVT::i32);
|
2008-09-17 08:30:57 +08:00
|
|
|
Args.push_back(Entry);
|
|
|
|
|
|
|
|
Entry.Node = FPtr; Args.push_back(Entry);
|
|
|
|
Entry.Node = Nest; Args.push_back(Entry);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-09-17 08:30:57 +08:00
|
|
|
// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
|
|
|
|
std::pair<SDValue, SDValue> CallResult =
|
2009-08-12 08:36:31 +08:00
|
|
|
LowerCallTo(Chain, Op.getValueType().getTypeForEVT(*DAG.getContext()),
|
2009-07-10 01:57:24 +08:00
|
|
|
false, false, false, false, 0, CallingConv::C, false,
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
/*isReturnValueUsed=*/true,
|
2008-09-17 08:30:57 +08:00
|
|
|
DAG.getExternalSymbol("__trampoline_setup", PtrVT),
|
2009-01-31 07:10:59 +08:00
|
|
|
Args, DAG, dl);
|
2008-09-17 08:30:57 +08:00
|
|
|
|
|
|
|
SDValue Ops[] =
|
|
|
|
{ CallResult.first, CallResult.second };
|
|
|
|
|
2009-02-05 08:20:09 +08:00
|
|
|
return DAG.getMergeValues(Ops, 2, dl);
|
2008-09-17 08:30:57 +08:00
|
|
|
}
|
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
|
2008-09-17 08:30:57 +08:00
|
|
|
int VarArgsFrameIndex,
|
|
|
|
int VarArgsStackOffset,
|
|
|
|
unsigned VarArgsNumGPR,
|
|
|
|
unsigned VarArgsNumFPR,
|
|
|
|
const PPCSubtarget &Subtarget) {
|
2009-02-08 03:59:05 +08:00
|
|
|
DebugLoc dl = Op.getDebugLoc();
|
2007-04-03 21:59:52 +08:00
|
|
|
|
2009-08-15 19:54:46 +08:00
|
|
|
if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
|
2007-04-03 21:59:52 +08:00
|
|
|
// vastart just stores the address of the VarArgsFrameIndex slot into the
|
|
|
|
// memory location argument.
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
|
2008-02-07 06:27:42 +08:00
|
|
|
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
|
2009-02-05 04:06:27 +08:00
|
|
|
return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
|
2007-04-03 21:59:52 +08:00
|
|
|
}
|
|
|
|
|
2009-08-15 19:54:46 +08:00
|
|
|
// For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
|
2007-04-03 21:59:52 +08:00
|
|
|
// We suppose the given va_list is already allocated.
|
|
|
|
//
|
|
|
|
// typedef struct {
|
|
|
|
// char gpr; /* index into the array of 8 GPRs
|
|
|
|
// * stored in the register save area
|
|
|
|
// * gpr=0 corresponds to r3,
|
|
|
|
// * gpr=1 to r4, etc.
|
|
|
|
// */
|
|
|
|
// char fpr; /* index into the array of 8 FPRs
|
|
|
|
// * stored in the register save area
|
|
|
|
// * fpr=0 corresponds to f1,
|
|
|
|
// * fpr=1 to f2, etc.
|
|
|
|
// */
|
|
|
|
// char *overflow_arg_area;
|
|
|
|
// /* location on stack that holds
|
|
|
|
// * the next overflow argument
|
|
|
|
// */
|
|
|
|
// char *reg_save_area;
|
|
|
|
// /* where r3:r10 and f1:f8 (if saved)
|
|
|
|
// * are stored
|
|
|
|
// */
|
|
|
|
// } va_list[1];
|
|
|
|
|
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i32);
|
|
|
|
SDValue ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i32);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2007-04-03 21:59:52 +08:00
|
|
|
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue StackOffsetFI = DAG.getFrameIndex(VarArgsStackOffset, PtrVT);
|
|
|
|
SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-06-06 20:08:01 +08:00
|
|
|
uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
|
2008-02-07 06:27:42 +08:00
|
|
|
|
2008-06-06 20:08:01 +08:00
|
|
|
uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
|
2008-02-07 06:27:42 +08:00
|
|
|
|
|
|
|
uint64_t FPROffset = 1;
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-02-07 06:27:42 +08:00
|
|
|
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2007-04-03 21:59:52 +08:00
|
|
|
// Store first byte : number of int regs
|
2009-07-03 14:45:56 +08:00
|
|
|
SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
|
2009-08-12 04:47:22 +08:00
|
|
|
Op.getOperand(1), SV, 0, MVT::i8);
|
2008-02-07 06:27:42 +08:00
|
|
|
uint64_t nextOffset = FPROffset;
|
2009-02-05 04:06:27 +08:00
|
|
|
SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
|
2007-04-03 21:59:52 +08:00
|
|
|
ConstFPROffset);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2007-04-03 21:59:52 +08:00
|
|
|
// Store second byte : number of float regs
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue secondStore =
|
2009-08-12 04:47:22 +08:00
|
|
|
DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset, MVT::i8);
|
2008-02-07 06:27:42 +08:00
|
|
|
nextOffset += StackOffset;
|
2009-02-05 04:06:27 +08:00
|
|
|
nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2007-04-03 21:59:52 +08:00
|
|
|
// Store second word : arguments given on stack
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue thirdStore =
|
2009-02-05 04:06:27 +08:00
|
|
|
DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset);
|
2008-02-07 06:27:42 +08:00
|
|
|
nextOffset += FrameOffset;
|
2009-02-05 04:06:27 +08:00
|
|
|
nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
|
2007-04-03 21:59:52 +08:00
|
|
|
|
|
|
|
// Store third word : arguments given in registers
|
2009-02-05 04:06:27 +08:00
|
|
|
return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset);
|
2007-04-03 21:59:52 +08:00
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
}
|
|
|
|
|
2007-03-06 08:59:59 +08:00
|
|
|
#include "PPCGenCallingConv.inc"
|
|
|
|
|
2009-08-11 06:56:29 +08:00
|
|
|
static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
|
2009-07-03 14:45:56 +08:00
|
|
|
CCValAssign::LocInfo &LocInfo,
|
|
|
|
ISD::ArgFlagsTy &ArgFlags,
|
|
|
|
CCState &State) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2009-08-11 06:56:29 +08:00
|
|
|
static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, EVT &ValVT,
|
|
|
|
EVT &LocVT,
|
2009-07-03 14:45:56 +08:00
|
|
|
CCValAssign::LocInfo &LocInfo,
|
|
|
|
ISD::ArgFlagsTy &ArgFlags,
|
|
|
|
CCState &State) {
|
|
|
|
static const unsigned ArgRegs[] = {
|
|
|
|
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
|
|
|
|
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
|
|
|
|
};
|
|
|
|
const unsigned NumArgRegs = array_lengthof(ArgRegs);
|
|
|
|
|
|
|
|
unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
|
|
|
|
|
|
|
|
// Skip one register if the first unallocated register has an even register
|
|
|
|
// number and there are still argument registers available which have not been
|
|
|
|
// allocated yet. RegNum is actually an index into ArgRegs, which means we
|
|
|
|
// need to skip a register if RegNum is odd.
|
|
|
|
if (RegNum != NumArgRegs && RegNum % 2 == 1) {
|
|
|
|
State.AllocateReg(ArgRegs[RegNum]);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Always return false here, as this function only makes sure that the first
|
|
|
|
// unallocated register has an odd register number and does not actually
|
|
|
|
// allocate a register for the current argument.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2009-08-11 06:56:29 +08:00
|
|
|
static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, EVT &ValVT,
|
|
|
|
EVT &LocVT,
|
2009-07-03 14:45:56 +08:00
|
|
|
CCValAssign::LocInfo &LocInfo,
|
|
|
|
ISD::ArgFlagsTy &ArgFlags,
|
|
|
|
CCState &State) {
|
|
|
|
static const unsigned ArgRegs[] = {
|
|
|
|
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
|
|
|
|
PPC::F8
|
|
|
|
};
|
|
|
|
|
|
|
|
const unsigned NumArgRegs = array_lengthof(ArgRegs);
|
|
|
|
|
|
|
|
unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
|
|
|
|
|
|
|
|
// If there is only one Floating-point register left we need to put both f64
|
|
|
|
// values of a split ppc_fp128 value on the stack.
|
|
|
|
if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
|
|
|
|
State.AllocateReg(ArgRegs[RegNum]);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Always return false here, as this function only makes sure that the two f64
|
|
|
|
// values a ppc_fp128 value is split into are both passed in registers or both
|
|
|
|
// passed on the stack and does not actually allocate a register for the
|
|
|
|
// current argument.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2007-02-25 13:34:32 +08:00
|
|
|
/// GetFPR - Get the set of FP registers that should be allocated for arguments,
|
2009-08-15 19:54:46 +08:00
|
|
|
/// on Darwin.
|
|
|
|
static const unsigned *GetFPR() {
|
2007-02-25 13:34:32 +08:00
|
|
|
static const unsigned FPR[] = {
|
|
|
|
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
|
2009-08-15 19:54:46 +08:00
|
|
|
PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
|
2007-02-25 13:34:32 +08:00
|
|
|
};
|
2009-08-15 19:54:46 +08:00
|
|
|
|
2007-02-25 13:34:32 +08:00
|
|
|
return FPR;
|
|
|
|
}
|
|
|
|
|
2008-04-30 17:16:33 +08:00
|
|
|
/// CalculateStackSlotSize - Calculates the size reserved for this argument on
|
|
|
|
/// the stack.
|
2009-08-11 06:56:29 +08:00
|
|
|
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
|
2009-07-03 14:43:35 +08:00
|
|
|
unsigned PtrByteSize) {
|
|
|
|
unsigned ArgSize = ArgVT.getSizeInBits()/8;
|
2008-04-30 17:16:33 +08:00
|
|
|
if (Flags.isByVal())
|
|
|
|
ArgSize = Flags.getByValSize();
|
|
|
|
ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
|
|
|
|
|
|
|
|
return ArgSize;
|
|
|
|
}
|
|
|
|
|
2009-07-03 14:45:56 +08:00
|
|
|
SDValue
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
PPCTargetLowering::LowerFormalArguments(SDValue Chain,
|
2009-09-02 16:44:58 +08:00
|
|
|
CallingConv::ID CallConv, bool isVarArg,
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
const SmallVectorImpl<ISD::InputArg>
|
|
|
|
&Ins,
|
|
|
|
DebugLoc dl, SelectionDAG &DAG,
|
|
|
|
SmallVectorImpl<SDValue> &InVals) {
|
2009-08-15 19:54:46 +08:00
|
|
|
if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins,
|
|
|
|
dl, DAG, InVals);
|
|
|
|
} else {
|
|
|
|
return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
|
|
|
|
dl, DAG, InVals);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
SDValue
|
|
|
|
PPCTargetLowering::LowerFormalArguments_SVR4(
|
|
|
|
SDValue Chain,
|
2009-09-02 16:44:58 +08:00
|
|
|
CallingConv::ID CallConv, bool isVarArg,
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
const SmallVectorImpl<ISD::InputArg>
|
|
|
|
&Ins,
|
|
|
|
DebugLoc dl, SelectionDAG &DAG,
|
|
|
|
SmallVectorImpl<SDValue> &InVals) {
|
|
|
|
|
2009-08-15 19:54:46 +08:00
|
|
|
// 32-bit SVR4 ABI Stack Frame Layout:
|
2009-07-03 14:45:56 +08:00
|
|
|
// +-----------------------------------+
|
|
|
|
// +--> | Back chain |
|
|
|
|
// | +-----------------------------------+
|
|
|
|
// | | Floating-point register save area |
|
|
|
|
// | +-----------------------------------+
|
|
|
|
// | | General register save area |
|
|
|
|
// | +-----------------------------------+
|
|
|
|
// | | CR save word |
|
|
|
|
// | +-----------------------------------+
|
|
|
|
// | | VRSAVE save word |
|
|
|
|
// | +-----------------------------------+
|
|
|
|
// | | Alignment padding |
|
|
|
|
// | +-----------------------------------+
|
|
|
|
// | | Vector register save area |
|
|
|
|
// | +-----------------------------------+
|
|
|
|
// | | Local variable space |
|
|
|
|
// | +-----------------------------------+
|
|
|
|
// | | Parameter list area |
|
|
|
|
// | +-----------------------------------+
|
|
|
|
// | | LR save word |
|
|
|
|
// | +-----------------------------------+
|
|
|
|
// SP--> +--- | Back chain |
|
|
|
|
// +-----------------------------------+
|
|
|
|
//
|
|
|
|
// Specifications:
|
|
|
|
// System V Application Binary Interface PowerPC Processor Supplement
|
|
|
|
// AltiVec Technology Programming Interface Manual
|
|
|
|
|
|
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
|
|
MachineFrameInfo *MFI = MF.getFrameInfo();
|
|
|
|
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
2009-07-03 14:45:56 +08:00
|
|
|
// Potential tail calls could cause overwriting of argument stack slots.
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
bool isImmutable = !(PerformTailCallOpt && (CallConv==CallingConv::Fast));
|
2009-07-03 14:45:56 +08:00
|
|
|
unsigned PtrByteSize = 4;
|
|
|
|
|
|
|
|
// Assign locations to all of the incoming arguments.
|
|
|
|
SmallVector<CCValAssign, 16> ArgLocs;
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
|
|
|
|
*DAG.getContext());
|
2009-07-03 14:45:56 +08:00
|
|
|
|
|
|
|
// Reserve space for the linkage area on the stack.
|
|
|
|
CCInfo.AllocateStack(PPCFrameInfo::getLinkageSize(false, false), PtrByteSize);
|
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4);
|
2009-07-03 14:45:56 +08:00
|
|
|
|
|
|
|
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
|
|
|
CCValAssign &VA = ArgLocs[i];
|
|
|
|
|
|
|
|
// Arguments stored in registers.
|
|
|
|
if (VA.isRegLoc()) {
|
|
|
|
TargetRegisterClass *RC;
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT ValVT = VA.getValVT();
|
2009-07-03 14:45:56 +08:00
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
switch (ValVT.getSimpleVT().SimpleTy) {
|
2009-07-03 14:45:56 +08:00
|
|
|
default:
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
llvm_unreachable("ValVT not supported by formal arguments Lowering");
|
2009-08-12 04:47:22 +08:00
|
|
|
case MVT::i32:
|
2009-07-03 14:45:56 +08:00
|
|
|
RC = PPC::GPRCRegisterClass;
|
|
|
|
break;
|
2009-08-12 04:47:22 +08:00
|
|
|
case MVT::f32:
|
2009-07-03 14:45:56 +08:00
|
|
|
RC = PPC::F4RCRegisterClass;
|
|
|
|
break;
|
2009-08-12 04:47:22 +08:00
|
|
|
case MVT::f64:
|
2009-07-03 14:45:56 +08:00
|
|
|
RC = PPC::F8RCRegisterClass;
|
|
|
|
break;
|
2009-08-12 04:47:22 +08:00
|
|
|
case MVT::v16i8:
|
|
|
|
case MVT::v8i16:
|
|
|
|
case MVT::v4i32:
|
|
|
|
case MVT::v4f32:
|
2009-07-03 14:45:56 +08:00
|
|
|
RC = PPC::VRRCRegisterClass;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Transform the arguments stored in physical registers into virtual ones.
|
|
|
|
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);
|
2009-07-03 14:45:56 +08:00
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
InVals.push_back(ArgValue);
|
2009-07-03 14:45:56 +08:00
|
|
|
} else {
|
|
|
|
// Argument stored in memory.
|
|
|
|
assert(VA.isMemLoc());
|
|
|
|
|
|
|
|
unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
|
|
|
|
int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
|
|
|
|
isImmutable);
|
|
|
|
|
|
|
|
// Create load nodes to retrieve arguments from the stack.
|
|
|
|
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0));
|
2009-07-03 14:45:56 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Assign locations to all of the incoming aggregate by value arguments.
|
|
|
|
// Aggregates passed by value are stored in the local variable space of the
|
|
|
|
// caller's stack frame, right above the parameter list area.
|
|
|
|
SmallVector<CCValAssign, 16> ByValArgLocs;
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
CCState CCByValInfo(CallConv, isVarArg, getTargetMachine(),
|
2009-07-22 08:24:57 +08:00
|
|
|
ByValArgLocs, *DAG.getContext());
|
2009-07-03 14:45:56 +08:00
|
|
|
|
|
|
|
// Reserve stack space for the allocations in CCInfo.
|
|
|
|
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
|
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4_ByVal);
|
2009-07-03 14:45:56 +08:00
|
|
|
|
|
|
|
// Area that is at least reserved in the caller of this function.
|
|
|
|
unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
|
|
|
|
|
|
|
|
// Set the size that is at least reserved in caller of this function. Tail
|
|
|
|
// call optimized function's reserved stack space needs to be aligned so that
|
|
|
|
// taking the difference between two stack areas will result in an aligned
|
|
|
|
// stack.
|
|
|
|
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
|
|
|
|
|
|
|
|
MinReservedArea =
|
|
|
|
std::max(MinReservedArea,
|
|
|
|
PPCFrameInfo::getMinCallFrameSize(false, false));
|
|
|
|
|
|
|
|
unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
|
|
|
|
getStackAlignment();
|
|
|
|
unsigned AlignMask = TargetAlign-1;
|
|
|
|
MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
|
|
|
|
|
|
|
|
FI->setMinReservedArea(MinReservedArea);
|
|
|
|
|
|
|
|
SmallVector<SDValue, 8> MemOps;
|
|
|
|
|
|
|
|
// If the function takes variable number of arguments, make a frame index for
|
|
|
|
// the start of the first vararg value... for expansion of llvm.va_start.
|
|
|
|
if (isVarArg) {
|
|
|
|
static const unsigned GPArgRegs[] = {
|
|
|
|
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
|
|
|
|
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
|
|
|
|
};
|
|
|
|
const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
|
|
|
|
|
|
|
|
static const unsigned FPArgRegs[] = {
|
|
|
|
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
|
|
|
|
PPC::F8
|
|
|
|
};
|
|
|
|
const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
|
|
|
|
|
|
|
|
VarArgsNumGPR = CCInfo.getFirstUnallocated(GPArgRegs, NumGPArgRegs);
|
|
|
|
VarArgsNumFPR = CCInfo.getFirstUnallocated(FPArgRegs, NumFPArgRegs);
|
|
|
|
|
|
|
|
// Make room for NumGPArgRegs and NumFPArgRegs.
|
|
|
|
int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
|
2009-08-12 04:47:22 +08:00
|
|
|
NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
|
2009-07-03 14:45:56 +08:00
|
|
|
|
|
|
|
VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
|
|
|
|
CCInfo.getNextStackOffset());
|
|
|
|
|
|
|
|
VarArgsFrameIndex = MFI->CreateStackObject(Depth, 8);
|
|
|
|
SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
|
|
|
|
|
|
|
|
// The fixed integer arguments of a variadic function are
|
|
|
|
// stored to the VarArgsFrameIndex on the stack.
|
|
|
|
unsigned GPRIndex = 0;
|
|
|
|
for (; GPRIndex != VarArgsNumGPR; ++GPRIndex) {
|
|
|
|
SDValue Val = DAG.getRegister(GPArgRegs[GPRIndex], PtrVT);
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0);
|
2009-07-03 14:45:56 +08:00
|
|
|
MemOps.push_back(Store);
|
|
|
|
// Increment the address by four for the next argument to store
|
|
|
|
SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
|
|
|
|
FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
|
|
|
|
}
|
|
|
|
|
|
|
|
// If this function is vararg, store any remaining integer argument regs
|
|
|
|
// to their spots on the stack so that they may be loaded by deferencing the
|
|
|
|
// result of va_next.
|
|
|
|
for (; GPRIndex != NumGPArgRegs; ++GPRIndex) {
|
|
|
|
unsigned VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
|
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
|
2009-07-03 14:45:56 +08:00
|
|
|
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
|
|
|
|
MemOps.push_back(Store);
|
|
|
|
// Increment the address by four for the next argument to store
|
|
|
|
SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
|
|
|
|
FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
|
|
|
|
}
|
|
|
|
|
2009-08-15 19:54:46 +08:00
|
|
|
// FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
|
|
|
|
// is set.
|
2009-07-03 14:45:56 +08:00
|
|
|
|
|
|
|
// The double arguments are stored to the VarArgsFrameIndex
|
|
|
|
// on the stack.
|
|
|
|
unsigned FPRIndex = 0;
|
|
|
|
for (FPRIndex = 0; FPRIndex != VarArgsNumFPR; ++FPRIndex) {
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue Val = DAG.getRegister(FPArgRegs[FPRIndex], MVT::f64);
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0);
|
2009-07-03 14:45:56 +08:00
|
|
|
MemOps.push_back(Store);
|
|
|
|
// Increment the address by eight for the next argument to store
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
|
2009-07-03 14:45:56 +08:00
|
|
|
PtrVT);
|
|
|
|
FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (; FPRIndex != NumFPArgRegs; ++FPRIndex) {
|
|
|
|
unsigned VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
|
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
|
2009-07-03 14:45:56 +08:00
|
|
|
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
|
|
|
|
MemOps.push_back(Store);
|
|
|
|
// Increment the address by eight for the next argument to store
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
|
2009-07-03 14:45:56 +08:00
|
|
|
PtrVT);
|
|
|
|
FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!MemOps.empty())
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
Chain = DAG.getNode(ISD::TokenFactor, dl,
|
2009-08-12 04:47:22 +08:00
|
|
|
MVT::Other, &MemOps[0], MemOps.size());
|
2009-07-03 14:45:56 +08:00
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
return Chain;
|
2009-07-03 14:45:56 +08:00
|
|
|
}
|
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
PPCTargetLowering::LowerFormalArguments_Darwin(
|
|
|
|
SDValue Chain,
|
2009-09-02 16:44:58 +08:00
|
|
|
CallingConv::ID CallConv, bool isVarArg,
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
const SmallVectorImpl<ISD::InputArg>
|
|
|
|
&Ins,
|
|
|
|
DebugLoc dl, SelectionDAG &DAG,
|
|
|
|
SmallVectorImpl<SDValue> &InVals) {
|
2006-05-17 02:18:50 +08:00
|
|
|
// TODO: add description of PPC stack frame format, or at least some docs.
|
|
|
|
//
|
|
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
|
|
MachineFrameInfo *MFI = MF.getFrameInfo();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
2009-08-12 04:47:22 +08:00
|
|
|
bool isPPC64 = PtrVT == MVT::i64;
|
2008-04-30 17:16:33 +08:00
|
|
|
// Potential tail calls could cause overwriting of argument stack slots.
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
bool isImmutable = !(PerformTailCallOpt && (CallConv==CallingConv::Fast));
|
2006-11-28 22:53:52 +08:00
|
|
|
unsigned PtrByteSize = isPPC64 ? 8 : 4;
|
2006-11-17 06:43:37 +08:00
|
|
|
|
2009-07-03 14:47:08 +08:00
|
|
|
unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, true);
|
2008-04-30 17:16:33 +08:00
|
|
|
// Area that is at least reserved in caller of this function.
|
|
|
|
unsigned MinReservedArea = ArgOffset;
|
|
|
|
|
2006-06-27 06:48:35 +08:00
|
|
|
static const unsigned GPR_32[] = { // 32-bit registers.
|
2006-05-17 02:18:50 +08:00
|
|
|
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
|
|
|
|
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
|
|
|
|
};
|
2006-06-27 06:48:35 +08:00
|
|
|
static const unsigned GPR_64[] = { // 64-bit registers.
|
|
|
|
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
|
|
|
|
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
|
|
|
|
};
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-08-15 19:54:46 +08:00
|
|
|
static const unsigned *FPR = GetFPR();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-05-17 02:18:50 +08:00
|
|
|
static const unsigned VR[] = {
|
|
|
|
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
|
|
|
|
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
|
|
|
|
};
|
2006-06-27 06:48:35 +08:00
|
|
|
|
2007-09-07 12:06:50 +08:00
|
|
|
const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
|
2009-07-03 14:47:08 +08:00
|
|
|
const unsigned Num_FPR_Regs = 13;
|
2007-09-07 12:06:50 +08:00
|
|
|
const unsigned Num_VR_Regs = array_lengthof( VR);
|
2006-11-17 06:43:37 +08:00
|
|
|
|
|
|
|
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-06-27 06:48:35 +08:00
|
|
|
const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-03-15 01:41:26 +08:00
|
|
|
// In 32-bit non-varargs functions, the stack space for vectors is after the
|
|
|
|
// stack space for non-vectors. We do not use this space unless we have
|
|
|
|
// too many vectors to fit in registers, something that only occurs in
|
2009-02-18 06:15:04 +08:00
|
|
|
// constructed examples:), but we have to walk the arglist to figure
|
2008-03-15 01:41:26 +08:00
|
|
|
// that out...for the pathological case, compute VecArgOffset as the
|
|
|
|
// start of the vector parameter area. Computing VecArgOffset is the
|
|
|
|
// entire point of the following loop.
|
|
|
|
unsigned VecArgOffset = ArgOffset;
|
|
|
|
if (!isVarArg && !isPPC64) {
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
|
2008-03-15 01:41:26 +08:00
|
|
|
++ArgNo) {
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT ObjectVT = Ins[ArgNo].VT;
|
2008-06-06 20:08:01 +08:00
|
|
|
unsigned ObjSize = ObjectVT.getSizeInBits()/8;
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
|
2008-03-15 01:41:26 +08:00
|
|
|
|
2008-03-21 17:14:45 +08:00
|
|
|
if (Flags.isByVal()) {
|
2008-03-15 01:41:26 +08:00
|
|
|
// ObjSize is the true size, ArgSize rounded up to multiple of regs.
|
2008-03-21 17:14:45 +08:00
|
|
|
ObjSize = Flags.getByValSize();
|
2009-02-18 06:15:04 +08:00
|
|
|
unsigned ArgSize =
|
2008-03-15 01:41:26 +08:00
|
|
|
((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
|
|
|
|
VecArgOffset += ArgSize;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
switch(ObjectVT.getSimpleVT().SimpleTy) {
|
2009-07-15 00:55:14 +08:00
|
|
|
default: llvm_unreachable("Unhandled argument type!");
|
2009-08-12 04:47:22 +08:00
|
|
|
case MVT::i32:
|
|
|
|
case MVT::f32:
|
2008-03-15 01:41:26 +08:00
|
|
|
VecArgOffset += isPPC64 ? 8 : 4;
|
|
|
|
break;
|
2009-08-12 04:47:22 +08:00
|
|
|
case MVT::i64: // PPC64
|
|
|
|
case MVT::f64:
|
2008-03-15 01:41:26 +08:00
|
|
|
VecArgOffset += 8;
|
|
|
|
break;
|
2009-08-12 04:47:22 +08:00
|
|
|
case MVT::v4f32:
|
|
|
|
case MVT::v4i32:
|
|
|
|
case MVT::v8i16:
|
|
|
|
case MVT::v16i8:
|
2008-03-15 01:41:26 +08:00
|
|
|
// Nothing to do, we're only looking at Nonvector args here.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// We've found where the vector parameter area in memory is. Skip the
|
|
|
|
// first 12 parameters; these don't use that memory.
|
|
|
|
VecArgOffset = ((VecArgOffset+15)/16)*16;
|
|
|
|
VecArgOffset += 12*16;
|
|
|
|
|
2006-05-17 02:18:50 +08:00
|
|
|
// Add DAG nodes to load the arguments or copy them out of registers. On
|
2006-11-17 06:43:37 +08:00
|
|
|
// entry to a function on PPC, the arguments start after the linkage area,
|
|
|
|
// although the first ones are often in registers.
|
2007-03-13 23:02:46 +08:00
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SmallVector<SDValue, 8> MemOps;
|
2008-04-30 17:16:33 +08:00
|
|
|
unsigned nAltivecParamsAtEnd = 0;
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue ArgVal;
|
2006-05-17 02:18:50 +08:00
|
|
|
bool needsLoad = false;
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT ObjectVT = Ins[ArgNo].VT;
|
2008-06-06 20:08:01 +08:00
|
|
|
unsigned ObjSize = ObjectVT.getSizeInBits()/8;
|
2006-11-29 21:37:09 +08:00
|
|
|
unsigned ArgSize = ObjSize;
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
|
2006-05-17 02:18:50 +08:00
|
|
|
|
2006-05-17 02:51:52 +08:00
|
|
|
unsigned CurArgOffset = ArgOffset;
|
2008-03-08 04:27:40 +08:00
|
|
|
|
2008-04-30 17:16:33 +08:00
|
|
|
// Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
|
2009-08-12 04:47:22 +08:00
|
|
|
if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
|
|
|
|
ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
|
2008-04-30 17:16:33 +08:00
|
|
|
if (isVarArg || isPPC64) {
|
|
|
|
MinReservedArea = ((MinReservedArea+15)/16)*16;
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
MinReservedArea += CalculateStackSlotSize(ObjectVT,
|
2008-09-13 09:54:27 +08:00
|
|
|
Flags,
|
2008-04-30 17:16:33 +08:00
|
|
|
PtrByteSize);
|
|
|
|
} else nAltivecParamsAtEnd++;
|
|
|
|
} else
|
|
|
|
// Calculate min reserved area.
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
|
2008-09-13 09:54:27 +08:00
|
|
|
Flags,
|
2008-04-30 17:16:33 +08:00
|
|
|
PtrByteSize);
|
|
|
|
|
2008-03-08 04:27:40 +08:00
|
|
|
// FIXME the codegen can be much improved in some cases.
|
|
|
|
// We do not have to keep everything in memory.
|
2008-03-21 17:14:45 +08:00
|
|
|
if (Flags.isByVal()) {
|
2008-03-08 04:27:40 +08:00
|
|
|
// ObjSize is the true size, ArgSize rounded up to multiple of registers.
|
2008-03-21 17:14:45 +08:00
|
|
|
ObjSize = Flags.getByValSize();
|
2008-03-08 04:27:40 +08:00
|
|
|
ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
|
2008-03-08 09:41:42 +08:00
|
|
|
// Objects of size 1 and 2 are right justified, everything else is
|
|
|
|
// left justified. This means the memory address is adjusted forwards.
|
|
|
|
if (ObjSize==1 || ObjSize==2) {
|
|
|
|
CurArgOffset = CurArgOffset + (4 - ObjSize);
|
|
|
|
}
|
2008-03-08 04:27:40 +08:00
|
|
|
// The value of the object is its address.
|
|
|
|
int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset);
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
InVals.push_back(FIN);
|
2008-03-08 09:41:42 +08:00
|
|
|
if (ObjSize==1 || ObjSize==2) {
|
|
|
|
if (GPR_idx != Num_GPR_Regs) {
|
2009-07-03 14:43:35 +08:00
|
|
|
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
|
2009-02-18 06:15:04 +08:00
|
|
|
SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
|
2009-08-12 04:47:22 +08:00
|
|
|
NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 );
|
2008-03-08 09:41:42 +08:00
|
|
|
MemOps.push_back(Store);
|
|
|
|
++GPR_idx;
|
|
|
|
}
|
2009-07-03 14:47:08 +08:00
|
|
|
|
|
|
|
ArgOffset += PtrByteSize;
|
|
|
|
|
2008-03-08 09:41:42 +08:00
|
|
|
continue;
|
|
|
|
}
|
2008-03-08 04:27:40 +08:00
|
|
|
for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
|
|
|
|
// Store whatever pieces of the object are in registers
|
|
|
|
// to memory. ArgVal will be address of the beginning of
|
|
|
|
// the object.
|
|
|
|
if (GPR_idx != Num_GPR_Regs) {
|
2009-07-03 14:43:35 +08:00
|
|
|
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
|
2008-03-08 04:27:40 +08:00
|
|
|
int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset);
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
|
2009-02-04 10:34:38 +08:00
|
|
|
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
|
2008-03-08 04:27:40 +08:00
|
|
|
MemOps.push_back(Store);
|
|
|
|
++GPR_idx;
|
2009-07-03 14:47:08 +08:00
|
|
|
ArgOffset += PtrByteSize;
|
2008-03-08 04:27:40 +08:00
|
|
|
} else {
|
|
|
|
ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
switch (ObjectVT.getSimpleVT().SimpleTy) {
|
2009-07-15 00:55:14 +08:00
|
|
|
default: llvm_unreachable("Unhandled argument type!");
|
2009-08-12 04:47:22 +08:00
|
|
|
case MVT::i32:
|
2008-03-08 04:49:02 +08:00
|
|
|
if (!isPPC64) {
|
|
|
|
if (GPR_idx != Num_GPR_Regs) {
|
2009-07-03 14:43:35 +08:00
|
|
|
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
|
2009-08-12 04:47:22 +08:00
|
|
|
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
|
2008-03-08 04:49:02 +08:00
|
|
|
++GPR_idx;
|
|
|
|
} else {
|
|
|
|
needsLoad = true;
|
|
|
|
ArgSize = PtrByteSize;
|
|
|
|
}
|
2009-07-03 14:47:08 +08:00
|
|
|
// All int arguments reserve stack space in the Darwin ABI.
|
|
|
|
ArgOffset += PtrByteSize;
|
2008-03-08 04:49:02 +08:00
|
|
|
break;
|
2006-05-17 02:18:50 +08:00
|
|
|
}
|
2008-03-08 04:49:02 +08:00
|
|
|
// FALLTHROUGH
|
2009-08-12 04:47:22 +08:00
|
|
|
case MVT::i64: // PPC64
|
2006-06-27 06:48:35 +08:00
|
|
|
if (GPR_idx != Num_GPR_Regs) {
|
2009-07-03 14:43:35 +08:00
|
|
|
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
|
2009-08-12 04:47:22 +08:00
|
|
|
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
|
2008-03-08 04:49:02 +08:00
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
if (ObjectVT == MVT::i32) {
|
2008-03-08 04:49:02 +08:00
|
|
|
// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
|
2009-08-12 04:47:22 +08:00
|
|
|
// value to MVT::i64 and then truncate to the correct register size.
|
2008-03-21 17:14:45 +08:00
|
|
|
if (Flags.isSExt())
|
2009-08-12 04:47:22 +08:00
|
|
|
ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
|
2008-03-08 04:49:02 +08:00
|
|
|
DAG.getValueType(ObjectVT));
|
2008-03-21 17:14:45 +08:00
|
|
|
else if (Flags.isZExt())
|
2009-08-12 04:47:22 +08:00
|
|
|
ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
|
2008-03-08 04:49:02 +08:00
|
|
|
DAG.getValueType(ObjectVT));
|
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
|
2008-03-08 04:49:02 +08:00
|
|
|
}
|
|
|
|
|
2006-06-27 06:48:35 +08:00
|
|
|
++GPR_idx;
|
|
|
|
} else {
|
|
|
|
needsLoad = true;
|
2008-07-24 16:17:07 +08:00
|
|
|
ArgSize = PtrByteSize;
|
2006-06-27 06:48:35 +08:00
|
|
|
}
|
2009-07-03 14:47:08 +08:00
|
|
|
// All int arguments reserve stack space in the Darwin ABI.
|
|
|
|
ArgOffset += 8;
|
2006-06-27 06:48:35 +08:00
|
|
|
break;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
case MVT::f32:
|
|
|
|
case MVT::f64:
|
2006-05-17 02:51:52 +08:00
|
|
|
// Every 4 bytes of argument space consumes one of the GPRs available for
|
|
|
|
// argument passing.
|
2009-07-03 14:47:08 +08:00
|
|
|
if (GPR_idx != Num_GPR_Regs) {
|
2006-05-17 02:58:15 +08:00
|
|
|
++GPR_idx;
|
2006-11-18 09:57:19 +08:00
|
|
|
if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
|
2006-05-17 02:58:15 +08:00
|
|
|
++GPR_idx;
|
2006-05-17 02:51:52 +08:00
|
|
|
}
|
2006-05-17 02:58:15 +08:00
|
|
|
if (FPR_idx != Num_FPR_Regs) {
|
2006-05-17 02:18:50 +08:00
|
|
|
unsigned VReg;
|
2009-07-03 14:43:35 +08:00
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
if (ObjectVT == MVT::f32)
|
2009-07-03 14:43:35 +08:00
|
|
|
VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
|
2006-05-17 02:18:50 +08:00
|
|
|
else
|
2009-07-03 14:43:35 +08:00
|
|
|
VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
|
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
|
2006-05-17 02:18:50 +08:00
|
|
|
++FPR_idx;
|
|
|
|
} else {
|
|
|
|
needsLoad = true;
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-07-03 14:47:08 +08:00
|
|
|
// All FP arguments reserve stack space in the Darwin ABI.
|
|
|
|
ArgOffset += isPPC64 ? 8 : ObjSize;
|
2006-05-17 02:18:50 +08:00
|
|
|
break;
|
2009-08-12 04:47:22 +08:00
|
|
|
case MVT::v4f32:
|
|
|
|
case MVT::v4i32:
|
|
|
|
case MVT::v8i16:
|
|
|
|
case MVT::v16i8:
|
2008-03-12 08:22:17 +08:00
|
|
|
// Note that vector arguments in registers don't reserve stack space,
|
|
|
|
// except in varargs functions.
|
2006-05-17 02:58:15 +08:00
|
|
|
if (VR_idx != Num_VR_Regs) {
|
2009-07-03 14:43:35 +08:00
|
|
|
unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
|
2008-03-12 08:22:17 +08:00
|
|
|
if (isVarArg) {
|
|
|
|
while ((ArgOffset % 16) != 0) {
|
|
|
|
ArgOffset += PtrByteSize;
|
|
|
|
if (GPR_idx != Num_GPR_Regs)
|
|
|
|
GPR_idx++;
|
|
|
|
}
|
|
|
|
ArgOffset += 16;
|
2009-08-15 19:54:46 +08:00
|
|
|
GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
|
2008-03-12 08:22:17 +08:00
|
|
|
}
|
2006-05-17 02:18:50 +08:00
|
|
|
++VR_idx;
|
|
|
|
} else {
|
2008-03-15 01:41:26 +08:00
|
|
|
if (!isVarArg && !isPPC64) {
|
|
|
|
// Vectors go after all the nonvectors.
|
|
|
|
CurArgOffset = VecArgOffset;
|
|
|
|
VecArgOffset += 16;
|
|
|
|
} else {
|
|
|
|
// Vectors are aligned.
|
|
|
|
ArgOffset = ((ArgOffset+15)/16)*16;
|
|
|
|
CurArgOffset = ArgOffset;
|
|
|
|
ArgOffset += 16;
|
2008-03-12 08:49:20 +08:00
|
|
|
}
|
2006-05-17 02:18:50 +08:00
|
|
|
needsLoad = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-05-17 02:18:50 +08:00
|
|
|
// We need to load the argument to a virtual register if we determined above
|
2008-02-13 15:35:30 +08:00
|
|
|
// that we ran out of physical registers of the appropriate type.
|
2006-05-17 02:18:50 +08:00
|
|
|
if (needsLoad) {
|
2008-02-13 15:35:30 +08:00
|
|
|
int FI = MFI->CreateFixedObject(ObjSize,
|
2008-04-30 17:16:33 +08:00
|
|
|
CurArgOffset + (ArgSize - ObjSize),
|
|
|
|
isImmutable);
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
|
2006-05-17 02:18:50 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
InVals.push_back(ArgVal);
|
2006-05-17 02:18:50 +08:00
|
|
|
}
|
2008-03-08 04:27:40 +08:00
|
|
|
|
2008-04-30 17:16:33 +08:00
|
|
|
// Set the size that is at least reserved in caller of this function. Tail
|
|
|
|
// call optimized function's reserved stack space needs to be aligned so that
|
|
|
|
// taking the difference between two stack areas will result in an aligned
|
|
|
|
// stack.
|
|
|
|
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
|
|
|
|
// Add the Altivec parameters at the end, if needed.
|
|
|
|
if (nAltivecParamsAtEnd) {
|
|
|
|
MinReservedArea = ((MinReservedArea+15)/16)*16;
|
|
|
|
MinReservedArea += 16*nAltivecParamsAtEnd;
|
|
|
|
}
|
|
|
|
MinReservedArea =
|
|
|
|
std::max(MinReservedArea,
|
2009-07-03 14:47:08 +08:00
|
|
|
PPCFrameInfo::getMinCallFrameSize(isPPC64, true));
|
2008-04-30 17:16:33 +08:00
|
|
|
unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
|
|
|
|
getStackAlignment();
|
|
|
|
unsigned AlignMask = TargetAlign-1;
|
|
|
|
MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
|
|
|
|
FI->setMinReservedArea(MinReservedArea);
|
|
|
|
|
2006-05-17 02:18:50 +08:00
|
|
|
// If the function takes variable number of arguments, make a frame index for
|
|
|
|
// the start of the first vararg value... for expansion of llvm.va_start.
|
|
|
|
if (isVarArg) {
|
2009-07-03 14:47:08 +08:00
|
|
|
int Depth = ArgOffset;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-06-06 20:08:01 +08:00
|
|
|
VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
|
2009-07-03 14:47:08 +08:00
|
|
|
Depth);
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-05-17 02:18:50 +08:00
|
|
|
// If this function is vararg, store any remaining integer argument regs
|
|
|
|
// to their spots on the stack so that they may be loaded by deferencing the
|
|
|
|
// result of va_next.
|
2006-05-17 02:58:15 +08:00
|
|
|
for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
|
2006-11-18 09:57:19 +08:00
|
|
|
unsigned VReg;
|
2009-07-03 14:43:35 +08:00
|
|
|
|
2006-11-18 09:57:19 +08:00
|
|
|
if (isPPC64)
|
2009-07-03 14:43:35 +08:00
|
|
|
VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
|
2006-11-18 09:57:19 +08:00
|
|
|
else
|
2009-07-03 14:43:35 +08:00
|
|
|
VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
|
2006-11-18 09:57:19 +08:00
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
|
2009-02-04 10:34:38 +08:00
|
|
|
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
|
2006-05-17 02:18:50 +08:00
|
|
|
MemOps.push_back(Store);
|
|
|
|
// Increment the address by four for the next argument to store
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
|
2009-02-04 10:34:38 +08:00
|
|
|
FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
|
2006-05-17 02:18:50 +08:00
|
|
|
}
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-03-08 04:27:40 +08:00
|
|
|
if (!MemOps.empty())
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
Chain = DAG.getNode(ISD::TokenFactor, dl,
|
2009-08-12 04:47:22 +08:00
|
|
|
MVT::Other, &MemOps[0], MemOps.size());
|
2009-02-18 06:15:04 +08:00
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
return Chain;
|
2006-05-17 02:18:50 +08:00
|
|
|
}
|
|
|
|
|
2008-04-30 17:16:33 +08:00
|
|
|
/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus
|
2009-07-03 14:47:08 +08:00
|
|
|
/// linkage area for the Darwin ABI.
|
2008-04-30 17:16:33 +08:00
|
|
|
static unsigned
|
|
|
|
CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
|
|
|
|
bool isPPC64,
|
|
|
|
bool isVarArg,
|
|
|
|
unsigned CC,
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
const SmallVectorImpl<ISD::OutputArg>
|
|
|
|
&Outs,
|
2008-04-30 17:16:33 +08:00
|
|
|
unsigned &nAltivecParamsAtEnd) {
|
|
|
|
// Count how many bytes are to be pushed on the stack, including the linkage
|
|
|
|
// area, and parameter passing area. We start with 24/48 bytes, which is
|
|
|
|
// prereserved space for [SP][CR][LR][3 x unused].
|
2009-07-03 14:47:08 +08:00
|
|
|
unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, true);
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
unsigned NumOps = Outs.size();
|
2008-04-30 17:16:33 +08:00
|
|
|
unsigned PtrByteSize = isPPC64 ? 8 : 4;
|
|
|
|
|
|
|
|
// Add up all the space actually used.
|
|
|
|
// In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
|
|
|
|
// they all go in registers, but we must reserve stack space for them for
|
|
|
|
// possible use by the caller. In varargs or 64-bit calls, parameters are
|
|
|
|
// assigned stack space in order, with padding so Altivec parameters are
|
|
|
|
// 16-byte aligned.
|
|
|
|
nAltivecParamsAtEnd = 0;
|
|
|
|
for (unsigned i = 0; i != NumOps; ++i) {
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
SDValue Arg = Outs[i].Val;
|
|
|
|
ISD::ArgFlagsTy Flags = Outs[i].Flags;
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT ArgVT = Arg.getValueType();
|
2008-04-30 17:16:33 +08:00
|
|
|
// Varargs Altivec parameters are padded to a 16 byte boundary.
|
2009-08-12 04:47:22 +08:00
|
|
|
if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
|
|
|
|
ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
|
2008-04-30 17:16:33 +08:00
|
|
|
if (!isVarArg && !isPPC64) {
|
|
|
|
// Non-varargs Altivec parameters go after all the non-Altivec
|
|
|
|
// parameters; handle those later so we know how much padding we need.
|
|
|
|
nAltivecParamsAtEnd++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
// Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
|
|
|
|
NumBytes = ((NumBytes+15)/16)*16;
|
|
|
|
}
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
|
2008-04-30 17:16:33 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Allow for Altivec parameters at the end, if needed.
|
|
|
|
if (nAltivecParamsAtEnd) {
|
|
|
|
NumBytes = ((NumBytes+15)/16)*16;
|
|
|
|
NumBytes += 16*nAltivecParamsAtEnd;
|
|
|
|
}
|
|
|
|
|
|
|
|
// The prolog code of the callee may store up to 8 GPR argument registers to
|
|
|
|
// the stack, allowing va_start to index over them in memory if its varargs.
|
|
|
|
// Because we cannot tell if this is needed on the caller side, we have to
|
|
|
|
// conservatively assume that it is needed. As such, make sure we have at
|
|
|
|
// least enough stack space for the caller to store the 8 GPRs.
|
|
|
|
NumBytes = std::max(NumBytes,
|
2009-07-03 14:47:08 +08:00
|
|
|
PPCFrameInfo::getMinCallFrameSize(isPPC64, true));
|
2008-04-30 17:16:33 +08:00
|
|
|
|
|
|
|
// Tail call needs the stack to be aligned.
|
|
|
|
if (CC==CallingConv::Fast && PerformTailCallOpt) {
|
|
|
|
unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
|
|
|
|
getStackAlignment();
|
|
|
|
unsigned AlignMask = TargetAlign-1;
|
|
|
|
NumBytes = (NumBytes + AlignMask) & ~AlignMask;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NumBytes;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
|
|
|
|
/// adjusted to accomodate the arguments for the tailcall.
|
|
|
|
static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool IsTailCall,
|
|
|
|
unsigned ParamSize) {
|
|
|
|
|
|
|
|
if (!IsTailCall) return 0;
|
|
|
|
|
|
|
|
PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
|
|
|
|
unsigned CallerMinReservedArea = FI->getMinReservedArea();
|
|
|
|
int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
|
|
|
|
// Remember only if the new adjustement is bigger.
|
|
|
|
if (SPDiff < FI->getTailCallSPDelta())
|
|
|
|
FI->setTailCallSPDelta(SPDiff);
|
|
|
|
|
|
|
|
return SPDiff;
|
|
|
|
}
|
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
|
|
|
|
/// for tail call optimization. Targets which want to do tail call
|
|
|
|
/// optimization should implement this function.
|
2008-04-30 17:16:33 +08:00
|
|
|
bool
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
|
2009-09-02 16:44:58 +08:00
|
|
|
CallingConv::ID CalleeCC,
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
bool isVarArg,
|
|
|
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
2008-04-30 17:16:33 +08:00
|
|
|
SelectionDAG& DAG) const {
|
|
|
|
// Variable argument functions are not supported.
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
if (isVarArg)
|
2008-09-13 00:56:44 +08:00
|
|
|
return false;
|
2008-04-30 17:16:33 +08:00
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
2009-09-02 16:44:58 +08:00
|
|
|
CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
|
|
|
|
// Functions containing by val parameters are not supported.
|
|
|
|
for (unsigned i = 0; i != Ins.size(); i++) {
|
|
|
|
ISD::ArgFlagsTy Flags = Ins[i].Flags;
|
|
|
|
if (Flags.isByVal()) return false;
|
|
|
|
}
|
2008-04-30 17:16:33 +08:00
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
// Non PIC/GOT tail calls are supported.
|
|
|
|
if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
|
|
|
|
return true;
|
2008-04-30 17:16:33 +08:00
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
// At the moment we can only do local tail calls (in same module, hidden
|
|
|
|
// or protected) if we are generating PIC.
|
|
|
|
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
|
|
|
|
return G->getGlobal()->hasHiddenVisibility()
|
|
|
|
|| G->getGlobal()->hasProtectedVisibility();
|
2008-04-30 17:16:33 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2006-05-18 03:00:46 +08:00
|
|
|
/// isCallCompatibleAddress - Return the immediate to use if the specified
|
|
|
|
/// 32-bit value is representable in the immediate field of a BxA instruction.
|
2008-07-28 05:46:04 +08:00
|
|
|
static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
|
2006-05-18 03:00:46 +08:00
|
|
|
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
|
|
|
|
if (!C) return 0;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-09-13 00:56:44 +08:00
|
|
|
int Addr = C->getZExtValue();
|
2006-05-18 03:00:46 +08:00
|
|
|
if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
|
|
|
|
(Addr << 6 >> 6) != Addr)
|
|
|
|
return 0; // Top 6 bits have to be sext of immediate.
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-09-13 00:56:44 +08:00
|
|
|
return DAG.getConstant((int)C->getZExtValue() >> 2,
|
2008-08-29 05:40:38 +08:00
|
|
|
DAG.getTargetLoweringInfo().getPointerTy()).getNode();
|
2006-05-18 03:00:46 +08:00
|
|
|
}
|
|
|
|
|
2008-05-13 08:00:25 +08:00
|
|
|
namespace {
|
|
|
|
|
2008-04-30 17:16:33 +08:00
|
|
|
struct TailCallArgumentInfo {
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Arg;
|
|
|
|
SDValue FrameIdxOp;
|
2008-04-30 17:16:33 +08:00
|
|
|
int FrameIdx;
|
|
|
|
|
|
|
|
TailCallArgumentInfo() : FrameIdx(0) {}
|
|
|
|
};
|
|
|
|
|
2008-05-13 08:00:25 +08:00
|
|
|
}
|
|
|
|
|
2008-04-30 17:16:33 +08:00
|
|
|
/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
|
|
|
|
static void
|
|
|
|
StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Chain,
|
2008-04-30 17:16:33 +08:00
|
|
|
const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
|
2009-02-05 04:06:27 +08:00
|
|
|
SmallVector<SDValue, 8> &MemOpChains,
|
|
|
|
DebugLoc dl) {
|
2008-04-30 17:16:33 +08:00
|
|
|
for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Arg = TailCallArgs[i].Arg;
|
|
|
|
SDValue FIN = TailCallArgs[i].FrameIdxOp;
|
2008-04-30 17:16:33 +08:00
|
|
|
int FI = TailCallArgs[i].FrameIdx;
|
|
|
|
// Store relative to framepointer.
|
2009-02-05 04:06:27 +08:00
|
|
|
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
|
2008-07-12 06:44:52 +08:00
|
|
|
PseudoSourceValue::getFixedStack(FI),
|
|
|
|
0));
|
2008-04-30 17:16:33 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
|
|
|
|
/// the appropriate stack slot for the tail call optimized function call.
|
2008-07-28 05:46:04 +08:00
|
|
|
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
|
2008-04-30 17:16:33 +08:00
|
|
|
MachineFunction &MF,
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Chain,
|
|
|
|
SDValue OldRetAddr,
|
|
|
|
SDValue OldFP,
|
2008-04-30 17:16:33 +08:00
|
|
|
int SPDiff,
|
|
|
|
bool isPPC64,
|
2009-07-03 14:47:08 +08:00
|
|
|
bool isDarwinABI,
|
2009-02-05 04:06:27 +08:00
|
|
|
DebugLoc dl) {
|
2008-04-30 17:16:33 +08:00
|
|
|
if (SPDiff) {
|
|
|
|
// Calculate the new stack slot for the return address.
|
|
|
|
int SlotSize = isPPC64 ? 8 : 4;
|
|
|
|
int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64,
|
2009-07-03 14:47:08 +08:00
|
|
|
isDarwinABI);
|
2008-04-30 17:16:33 +08:00
|
|
|
int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
|
|
|
|
NewRetAddrLoc);
|
2009-08-12 04:47:22 +08:00
|
|
|
EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
|
2009-02-05 04:06:27 +08:00
|
|
|
Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
|
2008-07-12 06:44:52 +08:00
|
|
|
PseudoSourceValue::getFixedStack(NewRetAddr), 0);
|
2009-07-03 14:45:56 +08:00
|
|
|
|
2009-08-15 19:54:46 +08:00
|
|
|
// When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
|
|
|
|
// slot as the FP is never overwritten.
|
2009-07-03 14:47:08 +08:00
|
|
|
if (isDarwinABI) {
|
2009-07-03 14:45:56 +08:00
|
|
|
int NewFPLoc =
|
2009-07-03 14:47:08 +08:00
|
|
|
SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI);
|
2009-07-03 14:45:56 +08:00
|
|
|
int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc);
|
|
|
|
SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
|
|
|
|
Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
|
|
|
|
PseudoSourceValue::getFixedStack(NewFPIdx), 0);
|
|
|
|
}
|
2008-04-30 17:16:33 +08:00
|
|
|
}
|
|
|
|
return Chain;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
|
|
|
|
/// the position of the argument.
|
|
|
|
static void
|
|
|
|
CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Arg, int SPDiff, unsigned ArgOffset,
|
2008-04-30 17:16:33 +08:00
|
|
|
SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
|
|
|
|
int Offset = ArgOffset + SPDiff;
|
2008-06-06 20:08:01 +08:00
|
|
|
uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
|
2008-04-30 17:16:33 +08:00
|
|
|
int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
|
2009-08-12 04:47:22 +08:00
|
|
|
EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue FIN = DAG.getFrameIndex(FI, VT);
|
2008-04-30 17:16:33 +08:00
|
|
|
TailCallArgumentInfo Info;
|
|
|
|
Info.Arg = Arg;
|
|
|
|
Info.FrameIdxOp = FIN;
|
|
|
|
Info.FrameIdx = FI;
|
|
|
|
TailCallArguments.push_back(Info);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
|
|
|
|
/// stack slot. Returns the chain as result and the loaded frame pointers in
|
|
|
|
/// LROpOut/FPOpout. Used when tail calling.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
|
2009-02-05 04:06:27 +08:00
|
|
|
int SPDiff,
|
|
|
|
SDValue Chain,
|
|
|
|
SDValue &LROpOut,
|
|
|
|
SDValue &FPOpOut,
|
2009-07-03 14:47:08 +08:00
|
|
|
bool isDarwinABI,
|
2009-02-05 04:06:27 +08:00
|
|
|
DebugLoc dl) {
|
2008-04-30 17:16:33 +08:00
|
|
|
if (SPDiff) {
|
|
|
|
// Load the LR and FP stack slot for later adjusting.
|
2009-08-12 04:47:22 +08:00
|
|
|
EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
|
2008-04-30 17:16:33 +08:00
|
|
|
LROpOut = getReturnAddrFrameIndex(DAG);
|
2009-02-05 04:06:27 +08:00
|
|
|
LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0);
|
2008-08-29 05:40:38 +08:00
|
|
|
Chain = SDValue(LROpOut.getNode(), 1);
|
2009-07-03 14:45:56 +08:00
|
|
|
|
2009-08-15 19:54:46 +08:00
|
|
|
// When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
|
|
|
|
// slot as the FP is never overwritten.
|
2009-07-03 14:47:08 +08:00
|
|
|
if (isDarwinABI) {
|
2009-07-03 14:45:56 +08:00
|
|
|
FPOpOut = getFramePointerFrameIndex(DAG);
|
|
|
|
FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0);
|
|
|
|
Chain = SDValue(FPOpOut.getNode(), 1);
|
|
|
|
}
|
2008-04-30 17:16:33 +08:00
|
|
|
}
|
|
|
|
return Chain;
|
|
|
|
}
|
|
|
|
|
2008-03-05 07:17:14 +08:00
|
|
|
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
|
2009-02-18 06:15:04 +08:00
|
|
|
/// by "Src" to address "Dst" of size "Size". Alignment information is
|
2008-03-05 07:17:14 +08:00
|
|
|
/// specified by the specific parameter attribute. The copy will be passed as
|
|
|
|
/// a byval function parameter.
|
|
|
|
/// Sometimes what we are copying is the end of a larger object, the part that
|
|
|
|
/// does not fit in registers.
|
2009-02-18 06:15:04 +08:00
|
|
|
static SDValue
|
2008-07-28 05:46:04 +08:00
|
|
|
CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
|
2008-03-21 17:14:45 +08:00
|
|
|
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
|
2009-07-03 14:43:35 +08:00
|
|
|
DebugLoc dl) {
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
|
2009-02-04 09:17:06 +08:00
|
|
|
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
|
|
|
|
false, NULL, 0, NULL, 0);
|
2008-03-05 07:17:14 +08:00
|
|
|
}
|
2007-02-25 13:34:32 +08:00
|
|
|
|
2008-04-30 17:16:33 +08:00
|
|
|
/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
|
|
|
|
/// tail calls.
|
|
|
|
static void
|
2008-07-28 05:46:04 +08:00
|
|
|
LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
|
|
|
|
SDValue Arg, SDValue PtrOff, int SPDiff,
|
2008-04-30 17:16:33 +08:00
|
|
|
unsigned ArgOffset, bool isPPC64, bool isTailCall,
|
2008-07-28 05:46:04 +08:00
|
|
|
bool isVector, SmallVector<SDValue, 8> &MemOpChains,
|
2009-02-05 04:06:27 +08:00
|
|
|
SmallVector<TailCallArgumentInfo, 8>& TailCallArguments,
|
|
|
|
DebugLoc dl) {
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
2008-04-30 17:16:33 +08:00
|
|
|
if (!isTailCall) {
|
|
|
|
if (isVector) {
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue StackPtr;
|
2008-04-30 17:16:33 +08:00
|
|
|
if (isPPC64)
|
2009-08-12 04:47:22 +08:00
|
|
|
StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
|
2008-04-30 17:16:33 +08:00
|
|
|
else
|
2009-08-12 04:47:22 +08:00
|
|
|
StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
|
2009-02-05 04:06:27 +08:00
|
|
|
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
|
2008-04-30 17:16:33 +08:00
|
|
|
DAG.getConstant(ArgOffset, PtrVT));
|
|
|
|
}
|
2009-02-05 04:06:27 +08:00
|
|
|
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
|
2008-04-30 17:16:33 +08:00
|
|
|
// Calculate and remember argument location.
|
|
|
|
} else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
|
|
|
|
TailCallArguments);
|
|
|
|
}
|
|
|
|
|
2009-07-03 14:47:08 +08:00
|
|
|
static
|
|
|
|
void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
|
|
|
|
DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
|
|
|
|
SDValue LROp, SDValue FPOp, bool isDarwinABI,
|
|
|
|
SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) {
|
|
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
|
|
|
|
|
|
// Emit a sequence of copyto/copyfrom virtual registers for arguments that
|
|
|
|
// might overwrite each other in case of tail call optimization.
|
|
|
|
SmallVector<SDValue, 8> MemOpChains2;
|
|
|
|
// Do not flag preceeding copytoreg stuff together with the following stuff.
|
|
|
|
InFlag = SDValue();
|
|
|
|
StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
|
|
|
|
MemOpChains2, dl);
|
|
|
|
if (!MemOpChains2.empty())
|
2009-08-12 04:47:22 +08:00
|
|
|
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
2009-07-03 14:47:08 +08:00
|
|
|
&MemOpChains2[0], MemOpChains2.size());
|
|
|
|
|
|
|
|
// Store the return address to the appropriate stack slot.
|
|
|
|
Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
|
|
|
|
isPPC64, isDarwinABI, dl);
|
|
|
|
|
|
|
|
// Emit callseq_end just before tailcall node.
|
|
|
|
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
|
|
|
|
DAG.getIntPtrConstant(0, true), InFlag);
|
|
|
|
InFlag = Chain.getValue(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
|
|
|
|
SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall,
|
|
|
|
SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
|
2009-08-11 06:56:29 +08:00
|
|
|
SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys,
|
2009-07-03 14:47:08 +08:00
|
|
|
bool isSVR4ABI) {
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
2009-08-12 04:47:22 +08:00
|
|
|
NodeTys.push_back(MVT::Other); // Returns a chain
|
|
|
|
NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
|
2009-07-03 14:47:08 +08:00
|
|
|
|
|
|
|
unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
|
|
|
|
|
|
|
|
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
|
|
|
|
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
|
|
|
|
// node so that legalize doesn't hack it.
|
|
|
|
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
|
|
|
|
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType());
|
|
|
|
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
|
|
|
|
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());
|
|
|
|
else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
|
|
|
|
// If this is an absolute destination address, use the munged value.
|
|
|
|
Callee = SDValue(Dest, 0);
|
|
|
|
else {
|
|
|
|
// Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
|
|
|
|
// to do the call, we can't use PPCISD::CALL.
|
|
|
|
SDValue MTCTROps[] = {Chain, Callee, InFlag};
|
|
|
|
Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
|
|
|
|
2 + (InFlag.getNode() != 0));
|
|
|
|
InFlag = Chain.getValue(1);
|
|
|
|
|
|
|
|
NodeTys.clear();
|
2009-08-12 04:47:22 +08:00
|
|
|
NodeTys.push_back(MVT::Other);
|
|
|
|
NodeTys.push_back(MVT::Flag);
|
2009-07-03 14:47:08 +08:00
|
|
|
Ops.push_back(Chain);
|
|
|
|
CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin;
|
|
|
|
Callee.setNode(0);
|
|
|
|
// Add CTR register as callee so a bctr can be emitted later.
|
|
|
|
if (isTailCall)
|
|
|
|
Ops.push_back(DAG.getRegister(PPC::CTR, PtrVT));
|
|
|
|
}
|
|
|
|
|
|
|
|
// If this is a direct call, pass the chain and the callee.
|
|
|
|
if (Callee.getNode()) {
|
|
|
|
Ops.push_back(Chain);
|
|
|
|
Ops.push_back(Callee);
|
|
|
|
}
|
|
|
|
// If this is a tail call add stack pointer delta.
|
|
|
|
if (isTailCall)
|
2009-08-12 04:47:22 +08:00
|
|
|
Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
|
2009-07-03 14:47:08 +08:00
|
|
|
|
|
|
|
// Add argument registers to the end of the list so that they are known live
|
|
|
|
// into the call.
|
|
|
|
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
|
|
|
|
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
|
|
|
|
RegsToPass[i].second.getValueType()));
|
|
|
|
|
|
|
|
return CallOpc;
|
|
|
|
}
|
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
SDValue
|
|
|
|
PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
|
2009-09-02 16:44:58 +08:00
|
|
|
CallingConv::ID CallConv, bool isVarArg,
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
|
|
|
DebugLoc dl, SelectionDAG &DAG,
|
|
|
|
SmallVectorImpl<SDValue> &InVals) {
|
|
|
|
|
2009-07-03 14:47:08 +08:00
|
|
|
SmallVector<CCValAssign, 16> RVLocs;
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(),
|
|
|
|
RVLocs, *DAG.getContext());
|
|
|
|
CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
|
2009-07-03 14:47:08 +08:00
|
|
|
|
|
|
|
// Copy all of the result registers out of their specified physreg.
|
|
|
|
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
|
|
|
|
CCValAssign &VA = RVLocs[i];
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT VT = VA.getValVT();
|
2009-07-03 14:47:08 +08:00
|
|
|
assert(VA.isRegLoc() && "Can only return in registers!");
|
|
|
|
Chain = DAG.getCopyFromReg(Chain, dl,
|
|
|
|
VA.getLocReg(), VT, InFlag).getValue(1);
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
InVals.push_back(Chain.getValue(0));
|
2009-07-03 14:47:08 +08:00
|
|
|
InFlag = Chain.getValue(2);
|
|
|
|
}
|
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
return Chain;
|
2009-07-03 14:47:08 +08:00
|
|
|
}
|
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
SDValue
|
2009-09-02 16:44:58 +08:00
|
|
|
PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
|
|
|
|
bool isTailCall, bool isVarArg,
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
SelectionDAG &DAG,
|
|
|
|
SmallVector<std::pair<unsigned, SDValue>, 8>
|
|
|
|
&RegsToPass,
|
|
|
|
SDValue InFlag, SDValue Chain,
|
|
|
|
SDValue &Callee,
|
|
|
|
int SPDiff, unsigned NumBytes,
|
|
|
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
|
|
|
SmallVectorImpl<SDValue> &InVals) {
|
2009-08-11 06:56:29 +08:00
|
|
|
std::vector<EVT> NodeTys;
|
2009-07-03 14:47:08 +08:00
|
|
|
SmallVector<SDValue, 8> Ops;
|
|
|
|
unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
|
|
|
|
isTailCall, RegsToPass, Ops, NodeTys,
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
PPCSubTarget.isSVR4ABI());
|
2009-07-03 14:47:08 +08:00
|
|
|
|
|
|
|
// When performing tail call optimization the callee pops its arguments off
|
|
|
|
// the stack. Account for this here so these bytes can be pushed back on in
|
|
|
|
// PPCRegisterInfo::eliminateCallFramePseudoInstr.
|
|
|
|
int BytesCalleePops =
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
(CallConv==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0;
|
2009-07-03 14:47:08 +08:00
|
|
|
|
|
|
|
if (InFlag.getNode())
|
|
|
|
Ops.push_back(InFlag);
|
|
|
|
|
|
|
|
// Emit tail call.
|
|
|
|
if (isTailCall) {
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
// If this is the first return lowered for this function, add the regs
|
|
|
|
// to the liveout set for the function.
|
|
|
|
if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
|
|
|
|
SmallVector<CCValAssign, 16> RVLocs;
|
|
|
|
CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
|
|
|
|
*DAG.getContext());
|
|
|
|
CCInfo.AnalyzeCallResult(Ins, RetCC_PPC);
|
|
|
|
for (unsigned i = 0; i != RVLocs.size(); ++i)
|
|
|
|
DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(((Callee.getOpcode() == ISD::Register &&
|
|
|
|
cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
|
|
|
|
Callee.getOpcode() == ISD::TargetExternalSymbol ||
|
|
|
|
Callee.getOpcode() == ISD::TargetGlobalAddress ||
|
|
|
|
isa<ConstantSDNode>(Callee)) &&
|
|
|
|
"Expecting an global address, external symbol, absolute value or register");
|
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
|
2009-07-03 14:47:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
|
|
|
|
InFlag = Chain.getValue(1);
|
|
|
|
|
2009-08-15 19:54:46 +08:00
|
|
|
// Add a NOP immediately after the branch instruction when using the 64-bit
|
|
|
|
// SVR4 ABI. At link time, if caller and callee are in a different module and
|
|
|
|
// thus have a different TOC, the call will be replaced with a call to a stub
|
|
|
|
// function which saves the current TOC, loads the TOC of the callee and
|
|
|
|
// branches to the callee. The NOP will be replaced with a load instruction
|
|
|
|
// which restores the TOC of the caller from the TOC save slot of the current
|
|
|
|
// stack frame. If caller and callee belong to the same module (and have the
|
|
|
|
// same TOC), the NOP will remain unchanged.
|
|
|
|
if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
|
|
|
|
// Insert NOP.
|
|
|
|
InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Flag, InFlag);
|
|
|
|
}
|
|
|
|
|
2009-07-03 14:47:08 +08:00
|
|
|
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
|
|
|
|
DAG.getIntPtrConstant(BytesCalleePops, true),
|
|
|
|
InFlag);
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
if (!Ins.empty())
|
2009-07-03 14:47:08 +08:00
|
|
|
InFlag = Chain.getValue(1);
|
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
|
|
|
|
Ins, dl, DAG, InVals);
|
|
|
|
}
|
|
|
|
|
|
|
|
SDValue
|
|
|
|
PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
|
2009-09-02 16:44:58 +08:00
|
|
|
CallingConv::ID CallConv, bool isVarArg,
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
bool isTailCall,
|
|
|
|
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
|
|
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
|
|
|
DebugLoc dl, SelectionDAG &DAG,
|
|
|
|
SmallVectorImpl<SDValue> &InVals) {
|
2009-08-15 19:54:46 +08:00
|
|
|
if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg,
|
|
|
|
isTailCall, Outs, Ins,
|
|
|
|
dl, DAG, InVals);
|
|
|
|
} else {
|
|
|
|
return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
|
|
|
|
isTailCall, Outs, Ins,
|
|
|
|
dl, DAG, InVals);
|
|
|
|
}
|
2009-07-03 14:47:08 +08:00
|
|
|
}
|
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
SDValue
|
|
|
|
PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
|
2009-09-02 16:44:58 +08:00
|
|
|
CallingConv::ID CallConv, bool isVarArg,
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
bool isTailCall,
|
|
|
|
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
|
|
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
|
|
|
DebugLoc dl, SelectionDAG &DAG,
|
|
|
|
SmallVectorImpl<SDValue> &InVals) {
|
|
|
|
// See PPCTargetLowering::LowerFormalArguments_SVR4() for a description
|
2009-08-15 19:54:46 +08:00
|
|
|
// of the 32-bit SVR4 ABI stack frame layout.
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
|
|
|
|
assert((!isTailCall ||
|
|
|
|
(CallConv == CallingConv::Fast && PerformTailCallOpt)) &&
|
|
|
|
"IsEligibleForTailCallOptimization missed a case!");
|
|
|
|
|
|
|
|
assert((CallConv == CallingConv::C ||
|
|
|
|
CallConv == CallingConv::Fast) && "Unknown calling convention!");
|
2009-07-03 14:45:56 +08:00
|
|
|
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
2009-07-03 14:45:56 +08:00
|
|
|
unsigned PtrByteSize = 4;
|
|
|
|
|
|
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
|
|
|
|
|
|
// Mark this function as potentially containing a function that contains a
|
|
|
|
// tail call. As a consequence the frame pointer will be used for dynamicalloc
|
|
|
|
// and restoring the callers stack pointer in this functions epilog. This is
|
|
|
|
// done because by tail calling the called function might overwrite the value
|
|
|
|
// in this function's (MF) stack pointer stack slot 0(SP).
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
if (PerformTailCallOpt && CallConv==CallingConv::Fast)
|
2009-07-03 14:45:56 +08:00
|
|
|
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
|
|
|
|
|
|
|
|
// Count how many bytes are to be pushed on the stack, including the linkage
|
|
|
|
// area, parameter list area and the part of the local variable space which
|
|
|
|
// contains copies of aggregates which are passed by value.
|
|
|
|
|
|
|
|
// Assign locations to all of the outgoing arguments.
|
|
|
|
SmallVector<CCValAssign, 16> ArgLocs;
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
|
|
|
|
ArgLocs, *DAG.getContext());
|
2009-07-03 14:45:56 +08:00
|
|
|
|
|
|
|
// Reserve space for the linkage area on the stack.
|
|
|
|
CCInfo.AllocateStack(PPCFrameInfo::getLinkageSize(false, false), PtrByteSize);
|
|
|
|
|
|
|
|
if (isVarArg) {
|
|
|
|
// Handle fixed and variable vector arguments differently.
|
|
|
|
// Fixed vector arguments go into registers as long as registers are
|
|
|
|
// available. Variable vector arguments always go into memory.
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
unsigned NumArgs = Outs.size();
|
2009-07-03 14:45:56 +08:00
|
|
|
|
|
|
|
for (unsigned i = 0; i != NumArgs; ++i) {
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT ArgVT = Outs[i].Val.getValueType();
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
|
2009-07-03 14:45:56 +08:00
|
|
|
bool Result;
|
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
if (Outs[i].IsFixed) {
|
2009-07-03 14:45:56 +08:00
|
|
|
Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
|
|
|
|
CCInfo);
|
|
|
|
} else {
|
|
|
|
Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
|
|
|
|
ArgFlags, CCInfo);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Result) {
|
2009-07-09 04:53:28 +08:00
|
|
|
#ifndef NDEBUG
|
2009-08-23 14:03:38 +08:00
|
|
|
errs() << "Call operand #" << i << " has unhandled type "
|
2009-08-11 06:56:29 +08:00
|
|
|
<< ArgVT.getEVTString() << "\n";
|
2009-07-09 04:53:28 +08:00
|
|
|
#endif
|
2009-07-15 00:55:14 +08:00
|
|
|
llvm_unreachable(0);
|
2009-07-03 14:45:56 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// All arguments are treated the same.
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4);
|
2009-07-03 14:45:56 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Assign locations to all of the outgoing aggregate by value arguments.
|
|
|
|
SmallVector<CCValAssign, 16> ByValArgLocs;
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
CCState CCByValInfo(CallConv, isVarArg, getTargetMachine(), ByValArgLocs,
|
2009-07-22 08:24:57 +08:00
|
|
|
*DAG.getContext());
|
2009-07-03 14:45:56 +08:00
|
|
|
|
|
|
|
// Reserve stack space for the allocations in CCInfo.
|
|
|
|
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
|
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4_ByVal);
|
2009-07-03 14:45:56 +08:00
|
|
|
|
|
|
|
// Size of the linkage area, parameter list area and the part of the local
|
|
|
|
// space variable where copies of aggregates which are passed by value are
|
|
|
|
// stored.
|
|
|
|
unsigned NumBytes = CCByValInfo.getNextStackOffset();
|
|
|
|
|
|
|
|
// Calculate by how many bytes the stack has to be adjusted in case of tail
|
|
|
|
// call optimization.
|
|
|
|
int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
|
|
|
|
|
|
|
|
// Adjust the stack pointer for the new arguments...
|
|
|
|
// These operations are automatically eliminated by the prolog/epilog pass
|
|
|
|
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
|
|
|
|
SDValue CallSeqStart = Chain;
|
|
|
|
|
|
|
|
// Load the return address and frame pointer so it can be moved somewhere else
|
|
|
|
// later.
|
|
|
|
SDValue LROp, FPOp;
|
|
|
|
Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
|
|
|
|
dl);
|
|
|
|
|
|
|
|
// Set up a copy of the stack pointer for use loading and storing any
|
|
|
|
// arguments that may not fit in the registers available for argument
|
|
|
|
// passing.
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
|
2009-07-03 14:45:56 +08:00
|
|
|
|
|
|
|
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
|
|
|
|
SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
|
|
|
|
SmallVector<SDValue, 8> MemOpChains;
|
|
|
|
|
|
|
|
// Walk the register/memloc assignments, inserting copies/loads.
|
|
|
|
for (unsigned i = 0, j = 0, e = ArgLocs.size();
|
|
|
|
i != e;
|
|
|
|
++i) {
|
|
|
|
CCValAssign &VA = ArgLocs[i];
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
SDValue Arg = Outs[i].Val;
|
|
|
|
ISD::ArgFlagsTy Flags = Outs[i].Flags;
|
2009-07-03 14:45:56 +08:00
|
|
|
|
|
|
|
if (Flags.isByVal()) {
|
|
|
|
// Argument is an aggregate which is passed by value, thus we need to
|
|
|
|
// create a copy of it in the local variable space of the current stack
|
|
|
|
// frame (which is the stack frame of the caller) and pass the address of
|
|
|
|
// this copy to the callee.
|
|
|
|
assert((j < ByValArgLocs.size()) && "Index out of bounds!");
|
|
|
|
CCValAssign &ByValVA = ByValArgLocs[j++];
|
|
|
|
assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
|
|
|
|
|
|
|
|
// Memory reserved in the local variable space of the callers stack frame.
|
|
|
|
unsigned LocMemOffset = ByValVA.getLocMemOffset();
|
|
|
|
|
|
|
|
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
|
|
|
|
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
|
|
|
|
|
|
|
|
// Create a copy of the argument in the local area of the current
|
|
|
|
// stack frame.
|
|
|
|
SDValue MemcpyCall =
|
|
|
|
CreateCopyOfByValArgument(Arg, PtrOff,
|
|
|
|
CallSeqStart.getNode()->getOperand(0),
|
|
|
|
Flags, DAG, dl);
|
|
|
|
|
|
|
|
// This must go outside the CALLSEQ_START..END.
|
|
|
|
SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
|
|
|
|
CallSeqStart.getNode()->getOperand(1));
|
|
|
|
DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
|
|
|
|
NewCallSeqStart.getNode());
|
|
|
|
Chain = CallSeqStart = NewCallSeqStart;
|
|
|
|
|
|
|
|
// Pass the address of the aggregate copy on the stack either in a
|
|
|
|
// physical register or in the parameter list area of the current stack
|
|
|
|
// frame to the callee.
|
|
|
|
Arg = PtrOff;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (VA.isRegLoc()) {
|
|
|
|
// Put argument in a physical register.
|
|
|
|
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
|
|
|
|
} else {
|
|
|
|
// Put argument in the parameter list area of the current stack frame.
|
|
|
|
assert(VA.isMemLoc());
|
|
|
|
unsigned LocMemOffset = VA.getLocMemOffset();
|
|
|
|
|
|
|
|
if (!isTailCall) {
|
|
|
|
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
|
|
|
|
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
|
|
|
|
|
|
|
|
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
|
|
|
|
PseudoSourceValue::getStack(), LocMemOffset));
|
|
|
|
} else {
|
|
|
|
// Calculate and remember argument location.
|
|
|
|
CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
|
|
|
|
TailCallArguments);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!MemOpChains.empty())
|
2009-08-12 04:47:22 +08:00
|
|
|
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
2009-07-03 14:45:56 +08:00
|
|
|
&MemOpChains[0], MemOpChains.size());
|
|
|
|
|
|
|
|
// Build a sequence of copy-to-reg nodes chained together with token chain
|
|
|
|
// and flag operands which copy the outgoing args into the appropriate regs.
|
|
|
|
SDValue InFlag;
|
|
|
|
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
|
|
|
|
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
|
|
|
|
RegsToPass[i].second, InFlag);
|
|
|
|
InFlag = Chain.getValue(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set CR6 to true if this is a vararg call.
|
|
|
|
if (isVarArg) {
|
2009-09-26 02:54:59 +08:00
|
|
|
SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0);
|
2009-07-03 14:45:56 +08:00
|
|
|
Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag);
|
|
|
|
InFlag = Chain.getValue(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (isTailCall) {
|
2009-07-03 14:47:08 +08:00
|
|
|
PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
|
|
|
|
false, TailCallArguments);
|
2009-07-03 14:45:56 +08:00
|
|
|
}
|
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
|
|
|
|
RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
|
|
|
|
Ins, InVals);
|
2009-07-03 14:45:56 +08:00
|
|
|
}
|
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
SDValue
|
|
|
|
PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
|
2009-09-02 16:44:58 +08:00
|
|
|
CallingConv::ID CallConv, bool isVarArg,
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
bool isTailCall,
|
|
|
|
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
|
|
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
|
|
|
DebugLoc dl, SelectionDAG &DAG,
|
|
|
|
SmallVectorImpl<SDValue> &InVals) {
|
|
|
|
|
|
|
|
unsigned NumOps = Outs.size();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
2009-08-12 04:47:22 +08:00
|
|
|
bool isPPC64 = PtrVT == MVT::i64;
|
2006-06-27 06:48:35 +08:00
|
|
|
unsigned PtrByteSize = isPPC64 ? 8 : 4;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-04-30 17:16:33 +08:00
|
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
|
|
|
|
|
|
// Mark this function as potentially containing a function that contains a
|
|
|
|
// tail call. As a consequence the frame pointer will be used for dynamicalloc
|
|
|
|
// and restoring the callers stack pointer in this functions epilog. This is
|
|
|
|
// done because by tail calling the called function might overwrite the value
|
|
|
|
// in this function's (MF) stack pointer stack slot 0(SP).
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
if (PerformTailCallOpt && CallConv==CallingConv::Fast)
|
2008-04-30 17:16:33 +08:00
|
|
|
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
|
|
|
|
|
|
|
|
unsigned nAltivecParamsAtEnd = 0;
|
|
|
|
|
2006-05-17 06:56:08 +08:00
|
|
|
// Count how many bytes are to be pushed on the stack, including the linkage
|
2006-06-27 06:48:35 +08:00
|
|
|
// area, and parameter passing area. We start with 24/48 bytes, which is
|
2006-05-17 08:15:40 +08:00
|
|
|
// prereserved space for [SP][CR][LR][3 x unused].
|
2008-04-30 17:16:33 +08:00
|
|
|
unsigned NumBytes =
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,
|
|
|
|
Outs,
|
2009-07-03 14:47:08 +08:00
|
|
|
nAltivecParamsAtEnd);
|
2006-05-17 07:54:25 +08:00
|
|
|
|
2008-04-30 17:16:33 +08:00
|
|
|
// Calculate by how many bytes the stack has to be adjusted in case of tail
|
|
|
|
// call optimization.
|
|
|
|
int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
// To protect arguments on the stack from being clobbered in a tail call,
|
|
|
|
// force all the loads to happen before doing any other lowering.
|
|
|
|
if (isTailCall)
|
|
|
|
Chain = DAG.getStackArgumentTokenFactor(Chain);
|
|
|
|
|
2006-05-17 08:15:40 +08:00
|
|
|
// Adjust the stack pointer for the new arguments...
|
|
|
|
// These operations are automatically eliminated by the prolog/epilog pass
|
2008-10-12 06:08:30 +08:00
|
|
|
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue CallSeqStart = Chain;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-04-30 17:16:33 +08:00
|
|
|
// Load the return address and frame pointer so it can be move somewhere else
|
|
|
|
// later.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue LROp, FPOp;
|
2009-07-03 14:45:56 +08:00
|
|
|
Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
|
|
|
|
dl);
|
2008-04-30 17:16:33 +08:00
|
|
|
|
2006-05-17 08:15:40 +08:00
|
|
|
// Set up a copy of the stack pointer for use loading and storing any
|
|
|
|
// arguments that may not fit in the registers available for argument
|
|
|
|
// passing.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue StackPtr;
|
2006-06-27 06:48:35 +08:00
|
|
|
if (isPPC64)
|
2009-08-12 04:47:22 +08:00
|
|
|
StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
|
2006-06-27 06:48:35 +08:00
|
|
|
else
|
2009-08-12 04:47:22 +08:00
|
|
|
StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-05-17 08:15:40 +08:00
|
|
|
// Figure out which arguments are going to go in registers, and which in
|
|
|
|
// memory. Also, if this is a vararg function, floating point operations
|
|
|
|
// must be stored to our stack, and loaded into integer regs as well, if
|
|
|
|
// any integer regs are available for argument passing.
|
2009-07-03 14:47:08 +08:00
|
|
|
unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, true);
|
2006-05-17 14:01:33 +08:00
|
|
|
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-06-27 06:48:35 +08:00
|
|
|
static const unsigned GPR_32[] = { // 32-bit registers.
|
2006-05-17 14:01:33 +08:00
|
|
|
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
|
|
|
|
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
|
|
|
|
};
|
2006-06-27 06:48:35 +08:00
|
|
|
static const unsigned GPR_64[] = { // 64-bit registers.
|
|
|
|
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
|
|
|
|
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
|
|
|
|
};
|
2009-08-15 19:54:46 +08:00
|
|
|
static const unsigned *FPR = GetFPR();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-05-17 14:01:33 +08:00
|
|
|
static const unsigned VR[] = {
|
|
|
|
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
|
|
|
|
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
|
|
|
|
};
|
2007-09-07 12:06:50 +08:00
|
|
|
const unsigned NumGPRs = array_lengthof(GPR_32);
|
2009-07-03 14:47:08 +08:00
|
|
|
const unsigned NumFPRs = 13;
|
2009-07-03 14:43:35 +08:00
|
|
|
const unsigned NumVRs = array_lengthof(VR);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-06-27 06:48:35 +08:00
|
|
|
const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
|
|
|
|
|
2009-07-03 14:47:08 +08:00
|
|
|
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
|
2008-04-30 17:16:33 +08:00
|
|
|
SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
|
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SmallVector<SDValue, 8> MemOpChains;
|
2006-05-25 08:57:32 +08:00
|
|
|
for (unsigned i = 0; i != NumOps; ++i) {
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
SDValue Arg = Outs[i].Val;
|
|
|
|
ISD::ArgFlagsTy Flags = Outs[i].Flags;
|
2007-03-13 23:02:46 +08:00
|
|
|
|
2006-05-17 08:15:40 +08:00
|
|
|
// PtrOff will be used to store the current argument to the stack if a
|
|
|
|
// register cannot be found for it.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue PtrOff;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-07-03 14:47:08 +08:00
|
|
|
PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
|
2007-03-13 23:02:46 +08:00
|
|
|
|
2009-02-04 10:34:38 +08:00
|
|
|
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
|
2006-06-27 06:48:35 +08:00
|
|
|
|
|
|
|
// On PPC64, promote integers to 64-bit values.
|
2009-08-12 04:47:22 +08:00
|
|
|
if (isPPC64 && Arg.getValueType() == MVT::i32) {
|
2008-03-21 17:14:45 +08:00
|
|
|
// FIXME: Should this use ANY_EXTEND if neither sext nor zext?
|
|
|
|
unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
|
2009-08-12 04:47:22 +08:00
|
|
|
Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
|
2006-06-27 06:48:35 +08:00
|
|
|
}
|
2008-03-05 07:17:14 +08:00
|
|
|
|
2008-03-08 04:27:40 +08:00
|
|
|
// FIXME memcpy is used way more than necessary. Correctness first.
|
2008-03-21 17:14:45 +08:00
|
|
|
if (Flags.isByVal()) {
|
|
|
|
unsigned Size = Flags.getByValSize();
|
2008-03-08 04:27:40 +08:00
|
|
|
if (Size==1 || Size==2) {
|
|
|
|
// Very small objects are passed right-justified.
|
|
|
|
// Everything else is passed left-justified.
|
2009-08-12 04:47:22 +08:00
|
|
|
EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
|
2008-03-08 04:27:40 +08:00
|
|
|
if (GPR_idx != NumGPRs) {
|
2009-02-18 06:15:04 +08:00
|
|
|
SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
|
2008-03-08 04:27:40 +08:00
|
|
|
NULL, 0, VT);
|
|
|
|
MemOpChains.push_back(Load.getValue(1));
|
|
|
|
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
|
2009-07-03 14:47:08 +08:00
|
|
|
|
|
|
|
ArgOffset += PtrByteSize;
|
2008-03-08 04:27:40 +08:00
|
|
|
} else {
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType());
|
2009-02-04 10:34:38 +08:00
|
|
|
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
|
2009-02-18 06:15:04 +08:00
|
|
|
CallSeqStart.getNode()->getOperand(0),
|
2009-07-03 14:43:35 +08:00
|
|
|
Flags, DAG, dl);
|
2008-03-08 04:27:40 +08:00
|
|
|
// This must go outside the CALLSEQ_START..END.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
|
2008-08-29 05:40:38 +08:00
|
|
|
CallSeqStart.getNode()->getOperand(1));
|
2008-08-31 23:37:04 +08:00
|
|
|
DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
|
|
|
|
NewCallSeqStart.getNode());
|
2008-03-08 04:27:40 +08:00
|
|
|
Chain = CallSeqStart = NewCallSeqStart;
|
|
|
|
ArgOffset += PtrByteSize;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
2008-03-17 10:13:43 +08:00
|
|
|
// Copy entire object into memory. There are cases where gcc-generated
|
|
|
|
// code assumes it is there, even if it could be put entirely into
|
|
|
|
// registers. (This is not what the doc says.)
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
|
2009-02-18 06:15:04 +08:00
|
|
|
CallSeqStart.getNode()->getOperand(0),
|
2009-07-03 14:43:35 +08:00
|
|
|
Flags, DAG, dl);
|
2008-03-17 10:13:43 +08:00
|
|
|
// This must go outside the CALLSEQ_START..END.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
|
2008-08-29 05:40:38 +08:00
|
|
|
CallSeqStart.getNode()->getOperand(1));
|
|
|
|
DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode());
|
2008-03-17 10:13:43 +08:00
|
|
|
Chain = CallSeqStart = NewCallSeqStart;
|
|
|
|
// And copy the pieces of it that fit into registers.
|
2008-03-05 07:17:14 +08:00
|
|
|
for (unsigned j=0; j<Size; j+=PtrByteSize) {
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
|
2009-02-04 10:34:38 +08:00
|
|
|
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
|
2008-03-05 07:17:14 +08:00
|
|
|
if (GPR_idx != NumGPRs) {
|
2009-02-04 10:34:38 +08:00
|
|
|
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0);
|
2008-03-06 07:31:27 +08:00
|
|
|
MemOpChains.push_back(Load.getValue(1));
|
2008-03-05 07:17:14 +08:00
|
|
|
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
|
2009-07-03 14:47:08 +08:00
|
|
|
ArgOffset += PtrByteSize;
|
2008-03-05 07:17:14 +08:00
|
|
|
} else {
|
2008-03-17 10:13:43 +08:00
|
|
|
ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
|
2008-03-08 04:27:40 +08:00
|
|
|
break;
|
2008-03-05 07:17:14 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
switch (Arg.getValueType().getSimpleVT().SimpleTy) {
|
2009-07-15 00:55:14 +08:00
|
|
|
default: llvm_unreachable("Unexpected ValueType for argument!");
|
2009-08-12 04:47:22 +08:00
|
|
|
case MVT::i32:
|
|
|
|
case MVT::i64:
|
2006-05-17 14:01:33 +08:00
|
|
|
if (GPR_idx != NumGPRs) {
|
|
|
|
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
|
2006-05-17 08:15:40 +08:00
|
|
|
} else {
|
2008-04-30 17:16:33 +08:00
|
|
|
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
|
|
|
|
isPPC64, isTailCall, false, MemOpChains,
|
2009-02-05 04:06:27 +08:00
|
|
|
TailCallArguments, dl);
|
2006-05-17 08:15:40 +08:00
|
|
|
}
|
2009-07-03 14:47:08 +08:00
|
|
|
ArgOffset += PtrByteSize;
|
2006-05-17 08:15:40 +08:00
|
|
|
break;
|
2009-08-12 04:47:22 +08:00
|
|
|
case MVT::f32:
|
|
|
|
case MVT::f64:
|
2006-05-17 14:01:33 +08:00
|
|
|
if (FPR_idx != NumFPRs) {
|
|
|
|
RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
|
|
|
|
|
2006-05-17 08:15:40 +08:00
|
|
|
if (isVarArg) {
|
2009-02-04 10:34:38 +08:00
|
|
|
SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0);
|
2006-05-17 14:01:33 +08:00
|
|
|
MemOpChains.push_back(Store);
|
|
|
|
|
2006-05-17 08:15:40 +08:00
|
|
|
// Float varargs are always shadowed in available integer registers
|
2006-05-17 14:01:33 +08:00
|
|
|
if (GPR_idx != NumGPRs) {
|
2009-02-04 10:34:38 +08:00
|
|
|
SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0);
|
2006-05-17 14:01:33 +08:00
|
|
|
MemOpChains.push_back(Load.getValue(1));
|
2009-07-03 14:47:08 +08:00
|
|
|
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
|
2006-05-17 08:15:40 +08:00
|
|
|
}
|
2009-08-12 04:47:22 +08:00
|
|
|
if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
|
2009-02-04 10:34:38 +08:00
|
|
|
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
|
|
|
|
SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0);
|
2006-05-17 14:01:33 +08:00
|
|
|
MemOpChains.push_back(Load.getValue(1));
|
2009-07-03 14:47:08 +08:00
|
|
|
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
|
2006-05-17 06:56:08 +08:00
|
|
|
}
|
|
|
|
} else {
|
2006-05-17 08:15:40 +08:00
|
|
|
// If we have any FPRs remaining, we may also have GPRs remaining.
|
|
|
|
// Args passed in FPRs consume either 1 (f32) or 2 (f64) available
|
|
|
|
// GPRs.
|
2009-07-03 14:47:08 +08:00
|
|
|
if (GPR_idx != NumGPRs)
|
|
|
|
++GPR_idx;
|
2009-08-12 04:47:22 +08:00
|
|
|
if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
|
2009-07-03 14:47:08 +08:00
|
|
|
!isPPC64) // PPC64 has 64-bit GPR's obviously :)
|
|
|
|
++GPR_idx;
|
2006-05-17 06:56:08 +08:00
|
|
|
}
|
2006-05-17 08:15:40 +08:00
|
|
|
} else {
|
2008-04-30 17:16:33 +08:00
|
|
|
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
|
|
|
|
isPPC64, isTailCall, false, MemOpChains,
|
2009-02-05 04:06:27 +08:00
|
|
|
TailCallArguments, dl);
|
2007-02-25 13:34:32 +08:00
|
|
|
}
|
2009-07-03 14:47:08 +08:00
|
|
|
if (isPPC64)
|
|
|
|
ArgOffset += 8;
|
|
|
|
else
|
2009-08-12 04:47:22 +08:00
|
|
|
ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
|
2006-05-17 08:15:40 +08:00
|
|
|
break;
|
2009-08-12 04:47:22 +08:00
|
|
|
case MVT::v4f32:
|
|
|
|
case MVT::v4i32:
|
|
|
|
case MVT::v8i16:
|
|
|
|
case MVT::v16i8:
|
2008-03-12 08:22:17 +08:00
|
|
|
if (isVarArg) {
|
|
|
|
// These go aligned on the stack, or in the corresponding R registers
|
2009-02-18 06:15:04 +08:00
|
|
|
// when within range. The Darwin PPC ABI doc claims they also go in
|
2008-03-12 08:22:17 +08:00
|
|
|
// V registers; in fact gcc does this only for arguments that are
|
|
|
|
// prototyped, not for those that match the ... We do it for all
|
|
|
|
// arguments, seems to work.
|
|
|
|
while (ArgOffset % 16 !=0) {
|
|
|
|
ArgOffset += PtrByteSize;
|
|
|
|
if (GPR_idx != NumGPRs)
|
|
|
|
GPR_idx++;
|
|
|
|
}
|
|
|
|
// We could elide this store in the case where the object fits
|
|
|
|
// entirely in R registers. Maybe later.
|
2009-02-18 06:15:04 +08:00
|
|
|
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
|
2008-03-12 08:22:17 +08:00
|
|
|
DAG.getConstant(ArgOffset, PtrVT));
|
2009-02-04 10:34:38 +08:00
|
|
|
SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0);
|
2008-03-12 08:22:17 +08:00
|
|
|
MemOpChains.push_back(Store);
|
|
|
|
if (VR_idx != NumVRs) {
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, NULL, 0);
|
2008-03-12 08:22:17 +08:00
|
|
|
MemOpChains.push_back(Load.getValue(1));
|
|
|
|
RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
|
|
|
|
}
|
|
|
|
ArgOffset += 16;
|
|
|
|
for (unsigned i=0; i<16; i+=PtrByteSize) {
|
|
|
|
if (GPR_idx == NumGPRs)
|
|
|
|
break;
|
2009-02-04 10:34:38 +08:00
|
|
|
SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
|
2008-03-12 08:22:17 +08:00
|
|
|
DAG.getConstant(i, PtrVT));
|
2009-02-04 10:34:38 +08:00
|
|
|
SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, NULL, 0);
|
2008-03-12 08:22:17 +08:00
|
|
|
MemOpChains.push_back(Load.getValue(1));
|
|
|
|
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2008-04-30 17:16:33 +08:00
|
|
|
|
2008-03-15 01:41:26 +08:00
|
|
|
// Non-varargs Altivec params generally go in registers, but have
|
|
|
|
// stack space allocated at the end.
|
|
|
|
if (VR_idx != NumVRs) {
|
|
|
|
// Doesn't have GPR space allocated.
|
|
|
|
RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
|
|
|
|
} else if (nAltivecParamsAtEnd==0) {
|
|
|
|
// We are emitting Altivec params in order.
|
2008-04-30 17:16:33 +08:00
|
|
|
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
|
|
|
|
isPPC64, isTailCall, true, MemOpChains,
|
2009-02-05 04:06:27 +08:00
|
|
|
TailCallArguments, dl);
|
2008-03-12 08:22:17 +08:00
|
|
|
ArgOffset += 16;
|
|
|
|
}
|
2006-05-17 08:15:40 +08:00
|
|
|
break;
|
2006-05-17 06:56:08 +08:00
|
|
|
}
|
|
|
|
}
|
2008-03-15 01:41:26 +08:00
|
|
|
// If all Altivec parameters fit in registers, as they usually do,
|
|
|
|
// they get stack space following the non-Altivec parameters. We
|
|
|
|
// don't track this here because nobody below needs it.
|
|
|
|
// If there are more Altivec parameters than fit in registers emit
|
|
|
|
// the stores here.
|
|
|
|
if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
|
|
|
|
unsigned j = 0;
|
|
|
|
// Offset is aligned; skip 1st 12 params which go in V registers.
|
|
|
|
ArgOffset = ((ArgOffset+15)/16)*16;
|
|
|
|
ArgOffset += 12*16;
|
|
|
|
for (unsigned i = 0; i != NumOps; ++i) {
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
SDValue Arg = Outs[i].Val;
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT ArgType = Arg.getValueType();
|
2009-08-12 04:47:22 +08:00
|
|
|
if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
|
|
|
|
ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
|
2008-03-15 01:41:26 +08:00
|
|
|
if (++j > NumVRs) {
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue PtrOff;
|
2008-04-30 17:16:33 +08:00
|
|
|
// We are emitting Altivec params in order.
|
|
|
|
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
|
|
|
|
isPPC64, isTailCall, true, MemOpChains,
|
2009-02-05 04:06:27 +08:00
|
|
|
TailCallArguments, dl);
|
2008-03-15 01:41:26 +08:00
|
|
|
ArgOffset += 16;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-05-17 14:01:33 +08:00
|
|
|
if (!MemOpChains.empty())
|
2009-08-12 04:47:22 +08:00
|
|
|
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
2006-08-12 01:38:39 +08:00
|
|
|
&MemOpChains[0], MemOpChains.size());
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-05-17 14:01:33 +08:00
|
|
|
// Build a sequence of copy-to-reg nodes chained together with token chain
|
|
|
|
// and flag operands which copy the outgoing args into the appropriate regs.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue InFlag;
|
2006-05-17 14:01:33 +08:00
|
|
|
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
|
2009-02-18 06:15:04 +08:00
|
|
|
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
|
2009-02-04 10:34:38 +08:00
|
|
|
RegsToPass[i].second, InFlag);
|
2006-05-17 14:01:33 +08:00
|
|
|
InFlag = Chain.getValue(1);
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-04-30 17:16:33 +08:00
|
|
|
if (isTailCall) {
|
2009-07-03 14:47:08 +08:00
|
|
|
PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
|
|
|
|
FPOp, true, TailCallArguments);
|
2006-05-17 14:01:33 +08:00
|
|
|
}
|
2008-03-20 05:39:28 +08:00
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
|
|
|
|
RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
|
|
|
|
Ins, InVals);
|
2006-05-17 06:56:08 +08:00
|
|
|
}
|
|
|
|
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
SDValue
|
|
|
|
PPCTargetLowering::LowerReturn(SDValue Chain,
|
2009-09-02 16:44:58 +08:00
|
|
|
CallingConv::ID CallConv, bool isVarArg,
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
|
|
|
DebugLoc dl, SelectionDAG &DAG) {
|
|
|
|
|
2007-03-06 08:59:59 +08:00
|
|
|
SmallVector<CCValAssign, 16> RVLocs;
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
|
|
|
|
RVLocs, *DAG.getContext());
|
|
|
|
CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2007-03-06 08:59:59 +08:00
|
|
|
// If this is the first return lowered for this function, add the regs to the
|
|
|
|
// liveout set for the function.
|
2007-12-31 12:13:23 +08:00
|
|
|
if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
|
2007-03-06 08:59:59 +08:00
|
|
|
for (unsigned i = 0; i != RVLocs.size(); ++i)
|
2007-12-31 12:13:23 +08:00
|
|
|
DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
|
2006-04-14 14:01:58 +08:00
|
|
|
}
|
2007-03-06 08:59:59 +08:00
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Flag;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2007-03-06 08:59:59 +08:00
|
|
|
// Copy the result values into the output registers.
|
|
|
|
for (unsigned i = 0; i != RVLocs.size(); ++i) {
|
|
|
|
CCValAssign &VA = RVLocs[i];
|
|
|
|
assert(VA.isRegLoc() && "Can only return in registers!");
|
2009-02-18 06:15:04 +08:00
|
|
|
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
|
Major calling convention code refactoring.
Instead of awkwardly encoding calling-convention information with ISD::CALL,
ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering
provides three virtual functions for targets to override:
LowerFormalArguments, LowerCall, and LowerRet, which replace the custom
lowering done on the special nodes. They provide the same information, but
in a more immediately usable format.
This also reworks much of the target-independent tail call logic. The
decision of whether or not to perform a tail call is now cleanly split
between target-independent portions, and the target dependent portion
in IsEligibleForTailCallOptimization.
This also synchronizes all in-tree targets, to help enable future
refactoring and feature work.
llvm-svn: 78142
2009-08-05 09:29:28 +08:00
|
|
|
Outs[i].Val, Flag);
|
2007-03-06 08:59:59 +08:00
|
|
|
Flag = Chain.getValue(1);
|
2006-04-14 14:01:58 +08:00
|
|
|
}
|
2007-03-06 08:59:59 +08:00
|
|
|
|
2008-08-29 05:40:38 +08:00
|
|
|
if (Flag.getNode())
|
2009-08-12 04:47:22 +08:00
|
|
|
return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
|
2007-03-06 08:59:59 +08:00
|
|
|
else
|
2009-08-12 04:47:22 +08:00
|
|
|
return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain);
|
2006-04-14 14:01:58 +08:00
|
|
|
}
|
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
|
2006-12-05 06:04:42 +08:00
|
|
|
const PPCSubtarget &Subtarget) {
|
|
|
|
// When we pop the dynamic allocation we need to restore the SP link.
|
2009-02-08 03:59:05 +08:00
|
|
|
DebugLoc dl = Op.getDebugLoc();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-12-05 06:04:42 +08:00
|
|
|
// Get the corect type for pointers.
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
2006-12-05 06:04:42 +08:00
|
|
|
|
|
|
|
// Construct the stack pointer operand.
|
|
|
|
bool IsPPC64 = Subtarget.isPPC64();
|
|
|
|
unsigned SP = IsPPC64 ? PPC::X1 : PPC::R1;
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue StackPtr = DAG.getRegister(SP, PtrVT);
|
2006-12-05 06:04:42 +08:00
|
|
|
|
|
|
|
// Get the operands for the STACKRESTORE.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Chain = Op.getOperand(0);
|
|
|
|
SDValue SaveSP = Op.getOperand(1);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-12-05 06:04:42 +08:00
|
|
|
// Load the old link SP.
|
2009-02-05 04:06:27 +08:00
|
|
|
SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-12-05 06:04:42 +08:00
|
|
|
// Restore the stack pointer.
|
2009-02-05 04:06:27 +08:00
|
|
|
Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-12-05 06:04:42 +08:00
|
|
|
// Store the old link SP.
|
2009-02-05 04:06:27 +08:00
|
|
|
return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0);
|
2006-12-05 06:04:42 +08:00
|
|
|
}
|
|
|
|
|
2008-04-30 17:16:33 +08:00
|
|
|
|
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue
|
2008-04-30 17:16:33 +08:00
|
|
|
PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
|
2006-11-17 06:43:37 +08:00
|
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
2008-04-30 17:16:33 +08:00
|
|
|
bool IsPPC64 = PPCSubTarget.isPPC64();
|
2009-07-03 14:47:08 +08:00
|
|
|
bool isDarwinABI = PPCSubTarget.isDarwinABI();
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
2008-04-30 17:16:33 +08:00
|
|
|
|
|
|
|
// Get current frame pointer save index. The users of this index will be
|
|
|
|
// primarily DYNALLOC instructions.
|
|
|
|
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
|
|
|
|
int RASI = FI->getReturnAddrSaveIndex();
|
|
|
|
|
|
|
|
// If the frame pointer save index hasn't been defined yet.
|
|
|
|
if (!RASI) {
|
|
|
|
// Find out what the fix offset of the frame pointer save area.
|
2009-07-03 14:47:08 +08:00
|
|
|
int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI);
|
2008-04-30 17:16:33 +08:00
|
|
|
// Allocate the frame index for frame pointer save area.
|
|
|
|
RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset);
|
|
|
|
// Save the result.
|
|
|
|
FI->setReturnAddrSaveIndex(RASI);
|
|
|
|
}
|
|
|
|
return DAG.getFrameIndex(RASI, PtrVT);
|
|
|
|
}
|
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue
|
2008-04-30 17:16:33 +08:00
|
|
|
PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
|
|
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
|
|
bool IsPPC64 = PPCSubTarget.isPPC64();
|
2009-07-03 14:47:08 +08:00
|
|
|
bool isDarwinABI = PPCSubTarget.isDarwinABI();
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
2006-11-17 06:43:37 +08:00
|
|
|
|
|
|
|
// Get current frame pointer save index. The users of this index will be
|
|
|
|
// primarily DYNALLOC instructions.
|
|
|
|
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
|
|
|
|
int FPSI = FI->getFramePointerSaveIndex();
|
2008-04-30 17:16:33 +08:00
|
|
|
|
2006-11-17 06:43:37 +08:00
|
|
|
// If the frame pointer save index hasn't been defined yet.
|
|
|
|
if (!FPSI) {
|
|
|
|
// Find out what the fix offset of the frame pointer save area.
|
2009-07-03 14:47:08 +08:00
|
|
|
int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64,
|
|
|
|
isDarwinABI);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-11-17 06:43:37 +08:00
|
|
|
// Allocate the frame index for frame pointer save area.
|
2009-02-18 06:15:04 +08:00
|
|
|
FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset);
|
2006-11-17 06:43:37 +08:00
|
|
|
// Save the result.
|
2009-02-18 06:15:04 +08:00
|
|
|
FI->setFramePointerSaveIndex(FPSI);
|
2006-11-17 06:43:37 +08:00
|
|
|
}
|
2008-04-30 17:16:33 +08:00
|
|
|
return DAG.getFrameIndex(FPSI, PtrVT);
|
|
|
|
}
|
2006-11-17 06:43:37 +08:00
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
|
2008-04-30 17:16:33 +08:00
|
|
|
SelectionDAG &DAG,
|
|
|
|
const PPCSubtarget &Subtarget) {
|
2006-11-17 06:43:37 +08:00
|
|
|
// Get the inputs.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Chain = Op.getOperand(0);
|
|
|
|
SDValue Size = Op.getOperand(1);
|
2009-02-18 06:15:04 +08:00
|
|
|
DebugLoc dl = Op.getDebugLoc();
|
|
|
|
|
2006-11-17 06:43:37 +08:00
|
|
|
// Get the corect type for pointers.
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
2006-11-17 06:43:37 +08:00
|
|
|
// Negate the size.
|
2009-02-07 05:50:26 +08:00
|
|
|
SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
|
2006-11-17 06:43:37 +08:00
|
|
|
DAG.getConstant(0, PtrVT), Size);
|
|
|
|
// Construct a node for the frame pointer save index.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue FPSIdx = getFramePointerFrameIndex(DAG);
|
2006-11-17 06:43:37 +08:00
|
|
|
// Build a DYNALLOC node.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Ops[3] = { Chain, NegSize, FPSIdx };
|
2009-08-12 04:47:22 +08:00
|
|
|
SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
|
2009-02-07 05:50:26 +08:00
|
|
|
return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
|
2006-11-17 06:43:37 +08:00
|
|
|
}
|
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
|
|
|
|
/// possible.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
|
2006-04-14 14:01:58 +08:00
|
|
|
// Not FP? Not a fsel.
|
2008-06-06 20:08:01 +08:00
|
|
|
if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
|
|
|
|
!Op.getOperand(2).getValueType().isFloatingPoint())
|
2009-05-28 12:31:08 +08:00
|
|
|
return Op;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
// Cannot handle SETEQ/SETNE.
|
2009-05-28 12:31:08 +08:00
|
|
|
if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT ResVT = Op.getValueType();
|
|
|
|
EVT CmpVT = Op.getOperand(0).getValueType();
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
|
|
|
|
SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
|
2009-02-07 05:50:26 +08:00
|
|
|
DebugLoc dl = Op.getDebugLoc();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
// If the RHS of the comparison is a 0.0, we don't need to do the
|
|
|
|
// subtraction at all.
|
|
|
|
if (isFloatingPointZero(RHS))
|
|
|
|
switch (CC) {
|
|
|
|
default: break; // SETUO etc aren't handled by fsel.
|
|
|
|
case ISD::SETULT:
|
|
|
|
case ISD::SETLT:
|
|
|
|
std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
|
2006-05-24 08:06:44 +08:00
|
|
|
case ISD::SETOGE:
|
2006-04-14 14:01:58 +08:00
|
|
|
case ISD::SETGE:
|
2009-08-12 04:47:22 +08:00
|
|
|
if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
|
|
|
|
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
|
2009-02-07 05:50:26 +08:00
|
|
|
return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
|
2006-04-14 14:01:58 +08:00
|
|
|
case ISD::SETUGT:
|
|
|
|
case ISD::SETGT:
|
|
|
|
std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
|
2006-05-24 08:06:44 +08:00
|
|
|
case ISD::SETOLE:
|
2006-04-14 14:01:58 +08:00
|
|
|
case ISD::SETLE:
|
2009-08-12 04:47:22 +08:00
|
|
|
if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
|
|
|
|
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
|
2009-02-07 05:50:26 +08:00
|
|
|
return DAG.getNode(PPCISD::FSEL, dl, ResVT,
|
2009-08-12 04:47:22 +08:00
|
|
|
DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
|
2006-04-14 14:01:58 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Cmp;
|
2006-04-14 14:01:58 +08:00
|
|
|
switch (CC) {
|
|
|
|
default: break; // SETUO etc aren't handled by fsel.
|
|
|
|
case ISD::SETULT:
|
|
|
|
case ISD::SETLT:
|
2009-02-07 05:50:26 +08:00
|
|
|
Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
|
2009-08-12 04:47:22 +08:00
|
|
|
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
|
|
|
|
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
|
2009-02-07 05:50:26 +08:00
|
|
|
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
|
2006-05-24 08:06:44 +08:00
|
|
|
case ISD::SETOGE:
|
2006-04-14 14:01:58 +08:00
|
|
|
case ISD::SETGE:
|
2009-02-07 05:50:26 +08:00
|
|
|
Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
|
2009-08-12 04:47:22 +08:00
|
|
|
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
|
|
|
|
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
|
2009-02-07 05:50:26 +08:00
|
|
|
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
|
2006-04-14 14:01:58 +08:00
|
|
|
case ISD::SETUGT:
|
|
|
|
case ISD::SETGT:
|
2009-02-07 05:50:26 +08:00
|
|
|
Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
|
2009-08-12 04:47:22 +08:00
|
|
|
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
|
|
|
|
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
|
2009-02-07 05:50:26 +08:00
|
|
|
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
|
2006-05-24 08:06:44 +08:00
|
|
|
case ISD::SETOLE:
|
2006-04-14 14:01:58 +08:00
|
|
|
case ISD::SETLE:
|
2009-02-07 05:50:26 +08:00
|
|
|
Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
|
2009-08-12 04:47:22 +08:00
|
|
|
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
|
|
|
|
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
|
2009-02-07 05:50:26 +08:00
|
|
|
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
|
2006-04-14 14:01:58 +08:00
|
|
|
}
|
2009-05-28 12:31:08 +08:00
|
|
|
return Op;
|
2006-04-14 14:01:58 +08:00
|
|
|
}
|
|
|
|
|
2007-11-29 02:44:47 +08:00
|
|
|
// FIXME: Split this code up when LegalizeDAGTypes lands.
|
2009-06-05 04:53:52 +08:00
|
|
|
SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
|
2009-02-06 06:07:54 +08:00
|
|
|
DebugLoc dl) {
|
2008-06-06 20:08:01 +08:00
|
|
|
assert(Op.getOperand(0).getValueType().isFloatingPoint());
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Src = Op.getOperand(0);
|
2009-08-12 04:47:22 +08:00
|
|
|
if (Src.getValueType() == MVT::f32)
|
|
|
|
Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
|
2008-07-20 00:26:02 +08:00
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Tmp;
|
2009-08-12 04:47:22 +08:00
|
|
|
switch (Op.getValueType().getSimpleVT().SimpleTy) {
|
2009-07-15 00:55:14 +08:00
|
|
|
default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
|
2009-08-12 04:47:22 +08:00
|
|
|
case MVT::i32:
|
2009-06-05 04:53:52 +08:00
|
|
|
Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
|
|
|
|
PPCISD::FCTIDZ,
|
2009-08-12 04:47:22 +08:00
|
|
|
dl, MVT::f64, Src);
|
2006-04-14 14:01:58 +08:00
|
|
|
break;
|
2009-08-12 04:47:22 +08:00
|
|
|
case MVT::i64:
|
|
|
|
Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src);
|
2006-04-14 14:01:58 +08:00
|
|
|
break;
|
|
|
|
}
|
2008-07-20 00:26:02 +08:00
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
// Convert the FP value to an int value through memory.
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
|
2008-07-20 00:26:02 +08:00
|
|
|
|
2007-10-16 04:14:52 +08:00
|
|
|
// Emit a store to the stack slot.
|
2009-02-05 04:06:27 +08:00
|
|
|
SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0);
|
2007-10-16 04:14:52 +08:00
|
|
|
|
|
|
|
// Result is a load from the stack slot. If loading 4 bytes, make sure to
|
|
|
|
// add in a bias.
|
2009-08-12 04:47:22 +08:00
|
|
|
if (Op.getValueType() == MVT::i32)
|
2009-02-05 04:06:27 +08:00
|
|
|
FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
|
2007-10-16 04:14:52 +08:00
|
|
|
DAG.getConstant(4, FIPtr.getValueType()));
|
2009-02-05 04:06:27 +08:00
|
|
|
return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0);
|
2006-04-14 14:01:58 +08:00
|
|
|
}
|
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
|
2009-02-08 03:59:05 +08:00
|
|
|
DebugLoc dl = Op.getDebugLoc();
|
2008-03-11 09:59:03 +08:00
|
|
|
// Don't handle ppc_fp128 here; let it be lowered to a libcall.
|
2009-08-12 04:47:22 +08:00
|
|
|
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
|
2008-07-28 05:46:04 +08:00
|
|
|
return SDValue();
|
2008-03-11 09:59:03 +08:00
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
if (Op.getOperand(0).getValueType() == MVT::i64) {
|
2009-02-18 06:15:04 +08:00
|
|
|
SDValue Bits = DAG.getNode(ISD::BIT_CONVERT, dl,
|
2009-08-12 04:47:22 +08:00
|
|
|
MVT::f64, Op.getOperand(0));
|
|
|
|
SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
|
|
|
|
if (Op.getValueType() == MVT::f32)
|
2009-02-18 06:15:04 +08:00
|
|
|
FP = DAG.getNode(ISD::FP_ROUND, dl,
|
2009-08-12 04:47:22 +08:00
|
|
|
MVT::f32, FP, DAG.getIntPtrConstant(0));
|
2006-04-14 14:01:58 +08:00
|
|
|
return FP;
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
assert(Op.getOperand(0).getValueType() == MVT::i32 &&
|
2006-04-14 14:01:58 +08:00
|
|
|
"Unhandled SINT_TO_FP type in custom expander!");
|
|
|
|
// Since we only generate this in 64-bit mode, we can take advantage of
|
|
|
|
// 64-bit registers. In particular, sign extend the input value into the
|
|
|
|
// 64-bit register with extsw, store the WHOLE 64-bit value into the stack
|
|
|
|
// then lfd it and fcfid it.
|
2009-09-26 04:36:54 +08:00
|
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
|
|
MachineFrameInfo *FrameInfo = MF.getFrameInfo();
|
2006-04-14 14:01:58 +08:00
|
|
|
int FrameIdx = FrameInfo->CreateStackObject(8, 8);
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32,
|
2006-04-14 14:01:58 +08:00
|
|
|
Op.getOperand(0));
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
// STD the extended value into the stack slot.
|
2009-09-26 04:36:54 +08:00
|
|
|
MachineMemOperand *MMO =
|
|
|
|
MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
|
|
|
|
MachineMemOperand::MOStore, 0, 8, 8);
|
|
|
|
SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx };
|
|
|
|
SDValue Store =
|
|
|
|
DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other),
|
|
|
|
Ops, 4, MVT::i64, MMO);
|
2006-04-14 14:01:58 +08:00
|
|
|
// Load the value as a double.
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
// FCFID it and return it.
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
|
|
|
|
if (Op.getValueType() == MVT::f32)
|
|
|
|
FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
|
2006-04-14 14:01:58 +08:00
|
|
|
return FP;
|
|
|
|
}
|
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
|
2009-02-08 03:59:05 +08:00
|
|
|
DebugLoc dl = Op.getDebugLoc();
|
2008-01-19 03:55:37 +08:00
|
|
|
/*
|
|
|
|
The rounding mode is in bits 30:31 of FPSR, and has the following
|
|
|
|
settings:
|
|
|
|
00 Round to nearest
|
|
|
|
01 Round to 0
|
|
|
|
10 Round to +inf
|
|
|
|
11 Round to -inf
|
|
|
|
|
|
|
|
FLT_ROUNDS, on the other hand, expects the following:
|
|
|
|
-1 Undefined
|
|
|
|
0 Round to 0
|
|
|
|
1 Round to nearest
|
|
|
|
2 Round to +inf
|
|
|
|
3 Round to -inf
|
|
|
|
|
|
|
|
To perform the conversion, we do:
|
|
|
|
((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
|
|
|
|
*/
|
|
|
|
|
|
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT VT = Op.getValueType();
|
|
|
|
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
|
|
|
std::vector<EVT> NodeTys;
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue MFFSreg, InFlag;
|
2008-01-19 03:55:37 +08:00
|
|
|
|
|
|
|
// Save FP Control Word to register
|
2009-08-12 04:47:22 +08:00
|
|
|
NodeTys.push_back(MVT::f64); // return register
|
|
|
|
NodeTys.push_back(MVT::Flag); // unused in this context
|
2009-02-05 04:06:27 +08:00
|
|
|
SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
|
2008-01-19 03:55:37 +08:00
|
|
|
|
|
|
|
// Save FP register to stack slot
|
|
|
|
int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
|
2009-02-05 04:06:27 +08:00
|
|
|
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
|
2008-01-19 03:55:37 +08:00
|
|
|
StackSlot, NULL, 0);
|
|
|
|
|
|
|
|
// Load FP Control Word from low 32 bits of stack slot.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Four = DAG.getConstant(4, PtrVT);
|
2009-02-05 04:06:27 +08:00
|
|
|
SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0);
|
2008-01-19 03:55:37 +08:00
|
|
|
|
|
|
|
// Transform as necessary
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue CWD1 =
|
2009-08-12 04:47:22 +08:00
|
|
|
DAG.getNode(ISD::AND, dl, MVT::i32,
|
|
|
|
CWD, DAG.getConstant(3, MVT::i32));
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue CWD2 =
|
2009-08-12 04:47:22 +08:00
|
|
|
DAG.getNode(ISD::SRL, dl, MVT::i32,
|
|
|
|
DAG.getNode(ISD::AND, dl, MVT::i32,
|
|
|
|
DAG.getNode(ISD::XOR, dl, MVT::i32,
|
|
|
|
CWD, DAG.getConstant(3, MVT::i32)),
|
|
|
|
DAG.getConstant(3, MVT::i32)),
|
|
|
|
DAG.getConstant(1, MVT::i32));
|
2008-01-19 03:55:37 +08:00
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue RetVal =
|
2009-08-12 04:47:22 +08:00
|
|
|
DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
|
2008-01-19 03:55:37 +08:00
|
|
|
|
2008-06-06 20:08:01 +08:00
|
|
|
return DAG.getNode((VT.getSizeInBits() < 16 ?
|
2009-02-05 04:06:27 +08:00
|
|
|
ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
|
2008-01-19 03:55:37 +08:00
|
|
|
}
|
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) {
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT VT = Op.getValueType();
|
2008-06-06 20:08:01 +08:00
|
|
|
unsigned BitWidth = VT.getSizeInBits();
|
2009-02-05 08:20:09 +08:00
|
|
|
DebugLoc dl = Op.getDebugLoc();
|
2008-03-08 04:36:53 +08:00
|
|
|
assert(Op.getNumOperands() == 3 &&
|
|
|
|
VT == Op.getOperand(1).getValueType() &&
|
|
|
|
"Unexpected SHL!");
|
2009-02-18 06:15:04 +08:00
|
|
|
|
Legalize is no longer limited to cleverness with just constant shift amounts.
Allow it to be clever when possible and fall back to the gross code when needed.
This allows us to compile:
long long foo1(long long X, int C) {
return X << (C|32);
}
long long foo2(long long X, int C) {
return X << (C&~32);
}
to:
_foo1:
rlwinm r2, r5, 0, 27, 31
slw r3, r4, r2
li r4, 0
blr
.globl _foo2
.align 4
_foo2:
rlwinm r2, r5, 0, 27, 25
subfic r5, r2, 32
slw r3, r3, r2
srw r5, r4, r5
or r3, r3, r5
slw r4, r4, r2
blr
instead of:
_foo1:
ori r2, r5, 32
subfic r5, r2, 32
addi r6, r2, -32
srw r5, r4, r5
slw r3, r3, r2
slw r6, r4, r6
or r3, r3, r5
slw r4, r4, r2
or r3, r3, r6
blr
.globl _foo2
.align 4
_foo2:
rlwinm r2, r5, 0, 27, 25
subfic r5, r2, 32
addi r6, r2, -32
srw r5, r4, r5
slw r3, r3, r2
slw r6, r4, r6
or r3, r3, r5
slw r4, r4, r2
or r3, r3, r6
blr
llvm-svn: 30507
2006-09-20 11:47:40 +08:00
|
|
|
// Expand into a bunch of logical ops. Note that these ops
|
2006-04-14 14:01:58 +08:00
|
|
|
// depend on the PPC behavior for oversized shift amounts.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Lo = Op.getOperand(0);
|
|
|
|
SDValue Hi = Op.getOperand(1);
|
|
|
|
SDValue Amt = Op.getOperand(2);
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT AmtVT = Amt.getValueType();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-02-05 08:20:09 +08:00
|
|
|
SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
|
2008-10-31 03:28:32 +08:00
|
|
|
DAG.getConstant(BitWidth, AmtVT), Amt);
|
2009-02-05 08:20:09 +08:00
|
|
|
SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
|
|
|
|
SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
|
|
|
|
SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
|
|
|
|
SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
|
2008-10-31 03:28:32 +08:00
|
|
|
DAG.getConstant(-BitWidth, AmtVT));
|
2009-02-05 08:20:09 +08:00
|
|
|
SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
|
|
|
|
SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
|
|
|
|
SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue OutOps[] = { OutLo, OutHi };
|
2009-02-05 08:20:09 +08:00
|
|
|
return DAG.getMergeValues(OutOps, 2, dl);
|
2006-04-14 14:01:58 +08:00
|
|
|
}
|
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) {
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT VT = Op.getValueType();
|
2009-02-05 08:20:09 +08:00
|
|
|
DebugLoc dl = Op.getDebugLoc();
|
2008-06-06 20:08:01 +08:00
|
|
|
unsigned BitWidth = VT.getSizeInBits();
|
2008-03-08 04:36:53 +08:00
|
|
|
assert(Op.getNumOperands() == 3 &&
|
|
|
|
VT == Op.getOperand(1).getValueType() &&
|
|
|
|
"Unexpected SRL!");
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-03-08 04:36:53 +08:00
|
|
|
// Expand into a bunch of logical ops. Note that these ops
|
2006-04-14 14:01:58 +08:00
|
|
|
// depend on the PPC behavior for oversized shift amounts.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Lo = Op.getOperand(0);
|
|
|
|
SDValue Hi = Op.getOperand(1);
|
|
|
|
SDValue Amt = Op.getOperand(2);
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT AmtVT = Amt.getValueType();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-02-05 08:20:09 +08:00
|
|
|
SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
|
2008-10-31 03:28:32 +08:00
|
|
|
DAG.getConstant(BitWidth, AmtVT), Amt);
|
2009-02-05 08:20:09 +08:00
|
|
|
SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
|
|
|
|
SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
|
|
|
|
SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
|
|
|
|
SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
|
2008-10-31 03:28:32 +08:00
|
|
|
DAG.getConstant(-BitWidth, AmtVT));
|
2009-02-05 08:20:09 +08:00
|
|
|
SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
|
|
|
|
SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
|
|
|
|
SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue OutOps[] = { OutLo, OutHi };
|
2009-02-05 08:20:09 +08:00
|
|
|
return DAG.getMergeValues(OutOps, 2, dl);
|
2006-04-14 14:01:58 +08:00
|
|
|
}
|
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) {
|
2009-02-08 03:59:05 +08:00
|
|
|
DebugLoc dl = Op.getDebugLoc();
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT VT = Op.getValueType();
|
2008-06-06 20:08:01 +08:00
|
|
|
unsigned BitWidth = VT.getSizeInBits();
|
2008-03-08 04:36:53 +08:00
|
|
|
assert(Op.getNumOperands() == 3 &&
|
|
|
|
VT == Op.getOperand(1).getValueType() &&
|
|
|
|
"Unexpected SRA!");
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-03-08 04:36:53 +08:00
|
|
|
// Expand into a bunch of logical ops, followed by a select_cc.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Lo = Op.getOperand(0);
|
|
|
|
SDValue Hi = Op.getOperand(1);
|
|
|
|
SDValue Amt = Op.getOperand(2);
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT AmtVT = Amt.getValueType();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-02-04 09:48:28 +08:00
|
|
|
SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
|
2008-10-31 03:28:32 +08:00
|
|
|
DAG.getConstant(BitWidth, AmtVT), Amt);
|
2009-02-04 09:48:28 +08:00
|
|
|
SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
|
|
|
|
SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
|
|
|
|
SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
|
|
|
|
SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
|
2008-10-31 03:28:32 +08:00
|
|
|
DAG.getConstant(-BitWidth, AmtVT));
|
2009-02-04 09:48:28 +08:00
|
|
|
SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
|
|
|
|
SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
|
|
|
|
SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
|
2008-10-31 03:28:32 +08:00
|
|
|
Tmp4, Tmp6, ISD::SETLE);
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue OutOps[] = { OutLo, OutHi };
|
2009-02-05 08:20:09 +08:00
|
|
|
return DAG.getMergeValues(OutOps, 2, dl);
|
2006-04-14 14:01:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Vector related lowering.
|
|
|
|
//
|
|
|
|
|
2006-04-17 14:00:21 +08:00
|
|
|
/// BuildSplatI - Build a canonical splati of Val with an element size of
|
|
|
|
/// SplatSize. Cast the result to VT.
|
2009-08-11 06:56:29 +08:00
|
|
|
static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
|
2009-02-06 09:31:28 +08:00
|
|
|
SelectionDAG &DAG, DebugLoc dl) {
|
2006-04-17 14:00:21 +08:00
|
|
|
assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
|
2006-12-01 09:45:39 +08:00
|
|
|
|
2009-08-11 06:56:29 +08:00
|
|
|
static const EVT VTys[] = { // canonical VT to use for each size.
|
2009-08-12 04:47:22 +08:00
|
|
|
MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
|
2006-04-17 14:00:21 +08:00
|
|
|
};
|
2006-12-01 09:45:39 +08:00
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-12-01 09:45:39 +08:00
|
|
|
// Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
|
|
|
|
if (Val == -1)
|
|
|
|
SplatSize = 1;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT CanonicalVT = VTys[SplatSize-1];
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-17 14:00:21 +08:00
|
|
|
// Build a canonical splat for this value.
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue Elt = DAG.getConstant(Val, MVT::i32);
|
2008-07-28 05:46:04 +08:00
|
|
|
SmallVector<SDValue, 8> Ops;
|
2008-06-06 20:08:01 +08:00
|
|
|
Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
|
2009-02-26 06:49:59 +08:00
|
|
|
SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT,
|
|
|
|
&Ops[0], Ops.size());
|
2009-02-06 09:31:28 +08:00
|
|
|
return DAG.getNode(ISD::BIT_CONVERT, dl, ReqVT, Res);
|
2006-04-17 14:00:21 +08:00
|
|
|
}
|
|
|
|
|
2006-04-18 11:24:30 +08:00
|
|
|
/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
|
2006-04-17 14:58:41 +08:00
|
|
|
/// specified intrinsic ID.
|
2008-07-28 05:46:04 +08:00
|
|
|
static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
|
2009-02-06 09:31:28 +08:00
|
|
|
SelectionDAG &DAG, DebugLoc dl,
|
2009-08-12 04:47:22 +08:00
|
|
|
EVT DestVT = MVT::Other) {
|
|
|
|
if (DestVT == MVT::Other) DestVT = LHS.getValueType();
|
2009-02-06 09:31:28 +08:00
|
|
|
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
|
2009-08-12 04:47:22 +08:00
|
|
|
DAG.getConstant(IID, MVT::i32), LHS, RHS);
|
2006-04-17 14:58:41 +08:00
|
|
|
}
|
|
|
|
|
2006-04-18 11:24:30 +08:00
|
|
|
/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
|
|
|
|
/// specified intrinsic ID.
|
2008-07-28 05:46:04 +08:00
|
|
|
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
|
2009-02-06 09:31:28 +08:00
|
|
|
SDValue Op2, SelectionDAG &DAG,
|
2009-08-12 04:47:22 +08:00
|
|
|
DebugLoc dl, EVT DestVT = MVT::Other) {
|
|
|
|
if (DestVT == MVT::Other) DestVT = Op0.getValueType();
|
2009-02-06 09:31:28 +08:00
|
|
|
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
|
2009-08-12 04:47:22 +08:00
|
|
|
DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
|
2006-04-18 11:24:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-04-18 01:55:10 +08:00
|
|
|
/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
|
|
|
|
/// amount. The result has the specified value type.
|
2008-07-28 05:46:04 +08:00
|
|
|
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT VT, SelectionDAG &DAG, DebugLoc dl) {
|
2006-04-18 01:55:10 +08:00
|
|
|
// Force LHS/RHS to be the right type.
|
2009-08-12 04:47:22 +08:00
|
|
|
LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, LHS);
|
|
|
|
RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, RHS);
|
2008-07-21 18:20:31 +08:00
|
|
|
|
2009-04-28 02:41:29 +08:00
|
|
|
int Ops[16];
|
2006-04-18 01:55:10 +08:00
|
|
|
for (unsigned i = 0; i != 16; ++i)
|
2009-04-28 02:41:29 +08:00
|
|
|
Ops[i] = i + Amt;
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
|
2009-02-06 09:31:28 +08:00
|
|
|
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T);
|
2006-04-18 01:55:10 +08:00
|
|
|
}
|
|
|
|
|
2006-04-14 13:19:18 +08:00
|
|
|
// If this is a case we can't handle, return null and let the default
|
|
|
|
// expansion code take care of it. If we CAN select this case, and if it
|
|
|
|
// selects to a single instruction, return Op. Otherwise, if we can codegen
|
|
|
|
// this case more efficiently than a constant pool load, lower it to the
|
|
|
|
// sequence of ops that should be used.
|
2009-03-01 09:13:55 +08:00
|
|
|
SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
|
2009-02-06 09:31:28 +08:00
|
|
|
DebugLoc dl = Op.getDebugLoc();
|
2009-03-01 09:13:55 +08:00
|
|
|
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
|
|
|
|
assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
|
2009-02-25 11:12:50 +08:00
|
|
|
|
2009-03-03 07:24:16 +08:00
|
|
|
// Check if this is a splat of a constant value.
|
|
|
|
APInt APSplatBits, APSplatUndef;
|
|
|
|
unsigned SplatBitSize;
|
2009-03-01 09:13:55 +08:00
|
|
|
bool HasAnyUndefs;
|
2009-03-04 03:26:27 +08:00
|
|
|
if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
|
|
|
|
HasAnyUndefs) || SplatBitSize > 32)
|
|
|
|
return SDValue();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-03-04 03:26:27 +08:00
|
|
|
unsigned SplatBits = APSplatBits.getZExtValue();
|
|
|
|
unsigned SplatUndef = APSplatUndef.getZExtValue();
|
|
|
|
unsigned SplatSize = SplatBitSize / 8;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-03-04 03:26:27 +08:00
|
|
|
// First, handle single instruction cases.
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-03-04 03:26:27 +08:00
|
|
|
// All zeros?
|
|
|
|
if (SplatBits == 0) {
|
|
|
|
// Canonicalize all zero vectors to be v4i32.
|
2009-08-12 04:47:22 +08:00
|
|
|
if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
|
|
|
|
SDValue Z = DAG.getConstant(0, MVT::i32);
|
|
|
|
Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
|
2009-03-04 03:26:27 +08:00
|
|
|
Op = DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Z);
|
2006-04-17 14:00:21 +08:00
|
|
|
}
|
2009-03-04 03:26:27 +08:00
|
|
|
return Op;
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-03-04 03:26:27 +08:00
|
|
|
// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
|
|
|
|
int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
|
|
|
|
(32-SplatBitSize));
|
|
|
|
if (SextVal >= -16 && SextVal <= 15)
|
|
|
|
return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
|
|
|
|
2009-03-04 03:26:27 +08:00
|
|
|
// Two instruction sequences.
|
2006-04-17 14:58:41 +08:00
|
|
|
|
2009-03-04 03:26:27 +08:00
|
|
|
// If this value is in the range [-32,30] and is even, use:
|
|
|
|
// tmp = VSPLTI[bhw], result = add tmp, tmp
|
|
|
|
if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
|
2009-03-04 03:26:27 +08:00
|
|
|
Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
|
|
|
|
return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-03-04 03:26:27 +08:00
|
|
|
// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
|
|
|
|
// 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
|
|
|
|
// for fneg/fabs.
|
|
|
|
if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
|
|
|
|
// Make -1 and vspltisw -1:
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-03-04 03:26:27 +08:00
|
|
|
// Make the VSLW intrinsic, computing 0x8000_0000.
|
|
|
|
SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
|
|
|
|
OnesV, DAG, dl);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-03-04 03:26:27 +08:00
|
|
|
// xor by OnesV to invert it.
|
2009-08-12 04:47:22 +08:00
|
|
|
Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
|
2009-03-04 03:26:27 +08:00
|
|
|
return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-03-04 03:26:27 +08:00
|
|
|
// Check to see if this is a wide variety of vsplti*, binop self cases.
|
|
|
|
static const signed char SplatCsts[] = {
|
|
|
|
-1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
|
|
|
|
-8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
|
|
|
|
};
|
2006-04-18 01:55:10 +08:00
|
|
|
|
2009-03-04 03:26:27 +08:00
|
|
|
for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
|
|
|
|
// Indirect through the SplatCsts array so that we favor 'vsplti -1' for
|
|
|
|
// cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
|
|
|
|
int i = SplatCsts[idx];
|
|
|
|
|
|
|
|
// Figure out what shift amount will be used by altivec if shifted by i in
|
|
|
|
// this splat size.
|
|
|
|
unsigned TypeShiftAmt = i & (SplatBitSize-1);
|
|
|
|
|
|
|
|
// vsplti + shl self.
|
|
|
|
if (SextVal == (i << (int)TypeShiftAmt)) {
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
|
2009-03-04 03:26:27 +08:00
|
|
|
static const unsigned IIDs[] = { // Intrinsic to use for each size.
|
|
|
|
Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
|
|
|
|
Intrinsic::ppc_altivec_vslw
|
|
|
|
};
|
|
|
|
Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
|
|
|
|
return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
|
|
|
|
}
|
|
|
|
|
|
|
|
// vsplti + srl self.
|
|
|
|
if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
|
2009-03-04 03:26:27 +08:00
|
|
|
static const unsigned IIDs[] = { // Intrinsic to use for each size.
|
|
|
|
Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
|
|
|
|
Intrinsic::ppc_altivec_vsrw
|
|
|
|
};
|
|
|
|
Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
|
|
|
|
return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
|
|
|
|
}
|
|
|
|
|
|
|
|
// vsplti + sra self.
|
|
|
|
if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
|
2009-03-04 03:26:27 +08:00
|
|
|
static const unsigned IIDs[] = { // Intrinsic to use for each size.
|
|
|
|
Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
|
|
|
|
Intrinsic::ppc_altivec_vsraw
|
|
|
|
};
|
|
|
|
Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
|
|
|
|
return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
|
2006-04-17 14:58:41 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-03-04 03:26:27 +08:00
|
|
|
// vsplti + rol self.
|
|
|
|
if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
|
|
|
|
((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
|
2009-03-04 03:26:27 +08:00
|
|
|
static const unsigned IIDs[] = { // Intrinsic to use for each size.
|
|
|
|
Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
|
|
|
|
Intrinsic::ppc_altivec_vrlw
|
|
|
|
};
|
|
|
|
Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
|
|
|
|
return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-03-04 03:26:27 +08:00
|
|
|
// t = vsplti c, result = vsldoi t, t, 1
|
|
|
|
if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) {
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
|
2009-03-04 03:26:27 +08:00
|
|
|
return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
|
|
|
|
}
|
|
|
|
// t = vsplti c, result = vsldoi t, t, 2
|
|
|
|
if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) {
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
|
2009-03-04 03:26:27 +08:00
|
|
|
return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
|
2006-04-18 02:09:22 +08:00
|
|
|
}
|
2009-03-04 03:26:27 +08:00
|
|
|
// t = vsplti c, result = vsldoi t, t, 3
|
|
|
|
if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) {
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
|
2009-03-04 03:26:27 +08:00
|
|
|
return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
|
2006-04-17 14:58:41 +08:00
|
|
|
}
|
2006-04-14 13:19:18 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-03-04 03:26:27 +08:00
|
|
|
// Three instruction sequences.
|
|
|
|
|
|
|
|
// Odd, in range [17,31]: (vsplti C)-(vsplti -16).
|
|
|
|
if (SextVal >= 0 && SextVal <= 31) {
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
|
|
|
|
SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
|
2009-03-04 03:26:27 +08:00
|
|
|
LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
|
|
|
|
return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS);
|
|
|
|
}
|
|
|
|
// Odd, in range [-31,-17]: (vsplti C)+(vsplti -16).
|
|
|
|
if (SextVal >= -31 && SextVal <= 0) {
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
|
|
|
|
SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
|
2009-03-04 03:26:27 +08:00
|
|
|
LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
|
|
|
|
return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS);
|
|
|
|
}
|
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
return SDValue();
|
2006-04-14 13:19:18 +08:00
|
|
|
}
|
|
|
|
|
2006-04-17 13:28:54 +08:00
|
|
|
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
|
|
|
|
/// the specified operations to build the shuffle.
|
2008-07-28 05:46:04 +08:00
|
|
|
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
|
2009-02-18 06:15:04 +08:00
|
|
|
SDValue RHS, SelectionDAG &DAG,
|
2009-02-06 09:31:28 +08:00
|
|
|
DebugLoc dl) {
|
2006-04-17 13:28:54 +08:00
|
|
|
unsigned OpNum = (PFEntry >> 26) & 0x0F;
|
2008-09-17 08:30:57 +08:00
|
|
|
unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
|
2006-04-17 13:28:54 +08:00
|
|
|
unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-17 13:28:54 +08:00
|
|
|
enum {
|
2006-05-16 12:20:24 +08:00
|
|
|
OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
|
2006-04-17 13:28:54 +08:00
|
|
|
OP_VMRGHW,
|
|
|
|
OP_VMRGLW,
|
|
|
|
OP_VSPLTISW0,
|
|
|
|
OP_VSPLTISW1,
|
|
|
|
OP_VSPLTISW2,
|
|
|
|
OP_VSPLTISW3,
|
|
|
|
OP_VSLDOI4,
|
|
|
|
OP_VSLDOI8,
|
2006-05-25 01:04:05 +08:00
|
|
|
OP_VSLDOI12
|
2006-04-17 13:28:54 +08:00
|
|
|
};
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-17 13:28:54 +08:00
|
|
|
if (OpNum == OP_COPY) {
|
|
|
|
if (LHSID == (1*9+2)*9+3) return LHS;
|
|
|
|
assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
|
|
|
|
return RHS;
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue OpLHS, OpRHS;
|
2009-02-06 09:31:28 +08:00
|
|
|
OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
|
|
|
|
OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-04-28 02:41:29 +08:00
|
|
|
int ShufIdxs[16];
|
2006-04-17 13:28:54 +08:00
|
|
|
switch (OpNum) {
|
2009-07-15 00:55:14 +08:00
|
|
|
default: llvm_unreachable("Unknown i32 permute!");
|
2006-04-17 13:28:54 +08:00
|
|
|
case OP_VMRGHW:
|
|
|
|
ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
|
|
|
|
ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
|
|
|
|
ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
|
|
|
|
ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
|
|
|
|
break;
|
|
|
|
case OP_VMRGLW:
|
|
|
|
ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
|
|
|
|
ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
|
|
|
|
ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
|
|
|
|
ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
|
|
|
|
break;
|
|
|
|
case OP_VSPLTISW0:
|
|
|
|
for (unsigned i = 0; i != 16; ++i)
|
|
|
|
ShufIdxs[i] = (i&3)+0;
|
|
|
|
break;
|
|
|
|
case OP_VSPLTISW1:
|
|
|
|
for (unsigned i = 0; i != 16; ++i)
|
|
|
|
ShufIdxs[i] = (i&3)+4;
|
|
|
|
break;
|
|
|
|
case OP_VSPLTISW2:
|
|
|
|
for (unsigned i = 0; i != 16; ++i)
|
|
|
|
ShufIdxs[i] = (i&3)+8;
|
|
|
|
break;
|
|
|
|
case OP_VSPLTISW3:
|
|
|
|
for (unsigned i = 0; i != 16; ++i)
|
|
|
|
ShufIdxs[i] = (i&3)+12;
|
|
|
|
break;
|
|
|
|
case OP_VSLDOI4:
|
2009-02-06 09:31:28 +08:00
|
|
|
return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
|
2006-04-17 13:28:54 +08:00
|
|
|
case OP_VSLDOI8:
|
2009-02-06 09:31:28 +08:00
|
|
|
return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
|
2006-04-17 13:28:54 +08:00
|
|
|
case OP_VSLDOI12:
|
2009-02-06 09:31:28 +08:00
|
|
|
return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
|
2006-04-17 13:28:54 +08:00
|
|
|
}
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT VT = OpLHS.getValueType();
|
2009-08-12 04:47:22 +08:00
|
|
|
OpLHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpLHS);
|
|
|
|
OpRHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpRHS);
|
|
|
|
SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
|
2009-04-28 02:41:29 +08:00
|
|
|
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T);
|
2006-04-17 13:28:54 +08:00
|
|
|
}
|
|
|
|
|
2006-04-14 13:19:18 +08:00
|
|
|
/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
|
|
|
|
/// is a shuffle we can handle in a single instruction, return it. Otherwise,
|
|
|
|
/// return the code it can be lowered into. Worst case, it can always be
|
|
|
|
/// lowered into a vperm.
|
2009-02-18 06:15:04 +08:00
|
|
|
SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
|
2009-04-28 02:41:29 +08:00
|
|
|
SelectionDAG &DAG) {
|
2009-02-06 09:31:28 +08:00
|
|
|
DebugLoc dl = Op.getDebugLoc();
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue V1 = Op.getOperand(0);
|
|
|
|
SDValue V2 = Op.getOperand(1);
|
2009-04-28 02:41:29 +08:00
|
|
|
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT VT = Op.getValueType();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-14 13:19:18 +08:00
|
|
|
// Cases that are handled by instructions that take permute immediates
|
|
|
|
// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
|
|
|
|
// selected by the instruction selector.
|
|
|
|
if (V2.getOpcode() == ISD::UNDEF) {
|
2009-04-28 02:41:29 +08:00
|
|
|
if (PPC::isSplatShuffleMask(SVOp, 1) ||
|
|
|
|
PPC::isSplatShuffleMask(SVOp, 2) ||
|
|
|
|
PPC::isSplatShuffleMask(SVOp, 4) ||
|
|
|
|
PPC::isVPKUWUMShuffleMask(SVOp, true) ||
|
|
|
|
PPC::isVPKUHUMShuffleMask(SVOp, true) ||
|
|
|
|
PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||
|
|
|
|
PPC::isVMRGLShuffleMask(SVOp, 1, true) ||
|
|
|
|
PPC::isVMRGLShuffleMask(SVOp, 2, true) ||
|
|
|
|
PPC::isVMRGLShuffleMask(SVOp, 4, true) ||
|
|
|
|
PPC::isVMRGHShuffleMask(SVOp, 1, true) ||
|
|
|
|
PPC::isVMRGHShuffleMask(SVOp, 2, true) ||
|
|
|
|
PPC::isVMRGHShuffleMask(SVOp, 4, true)) {
|
2006-04-14 13:19:18 +08:00
|
|
|
return Op;
|
|
|
|
}
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-14 13:19:18 +08:00
|
|
|
// Altivec has a variety of "shuffle immediates" that take two vector inputs
|
|
|
|
// and produce a fixed permutation. If any of these match, do not lower to
|
|
|
|
// VPERM.
|
2009-04-28 02:41:29 +08:00
|
|
|
if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||
|
|
|
|
PPC::isVPKUHUMShuffleMask(SVOp, false) ||
|
|
|
|
PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||
|
|
|
|
PPC::isVMRGLShuffleMask(SVOp, 1, false) ||
|
|
|
|
PPC::isVMRGLShuffleMask(SVOp, 2, false) ||
|
|
|
|
PPC::isVMRGLShuffleMask(SVOp, 4, false) ||
|
|
|
|
PPC::isVMRGHShuffleMask(SVOp, 1, false) ||
|
|
|
|
PPC::isVMRGHShuffleMask(SVOp, 2, false) ||
|
|
|
|
PPC::isVMRGHShuffleMask(SVOp, 4, false))
|
2006-04-14 13:19:18 +08:00
|
|
|
return Op;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-17 13:28:54 +08:00
|
|
|
// Check to see if this is a shuffle of 4-byte values. If so, we can use our
|
|
|
|
// perfect shuffle table to emit an optimal matching sequence.
|
2009-04-28 02:41:29 +08:00
|
|
|
SmallVector<int, 16> PermMask;
|
|
|
|
SVOp->getMask(PermMask);
|
|
|
|
|
2006-04-17 13:28:54 +08:00
|
|
|
unsigned PFIndexes[4];
|
|
|
|
bool isFourElementShuffle = true;
|
|
|
|
for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
|
|
|
|
unsigned EltNo = 8; // Start out undef.
|
|
|
|
for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
|
2009-04-28 02:41:29 +08:00
|
|
|
if (PermMask[i*4+j] < 0)
|
2006-04-17 13:28:54 +08:00
|
|
|
continue; // Undef, ignore it.
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-04-28 02:41:29 +08:00
|
|
|
unsigned ByteSource = PermMask[i*4+j];
|
2006-04-17 13:28:54 +08:00
|
|
|
if ((ByteSource & 3) != j) {
|
|
|
|
isFourElementShuffle = false;
|
|
|
|
break;
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-17 13:28:54 +08:00
|
|
|
if (EltNo == 8) {
|
|
|
|
EltNo = ByteSource/4;
|
|
|
|
} else if (EltNo != ByteSource/4) {
|
|
|
|
isFourElementShuffle = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
PFIndexes[i] = EltNo;
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
|
|
|
// If this shuffle can be expressed as a shuffle of 4-byte elements, use the
|
2006-04-17 13:28:54 +08:00
|
|
|
// perfect shuffle vector to determine if it is cost effective to do this as
|
|
|
|
// discrete instructions, or whether we should use a vperm.
|
|
|
|
if (isFourElementShuffle) {
|
|
|
|
// Compute the index in the perfect shuffle table.
|
2009-02-18 06:15:04 +08:00
|
|
|
unsigned PFTableIndex =
|
2006-04-17 13:28:54 +08:00
|
|
|
PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-17 13:28:54 +08:00
|
|
|
unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
|
|
|
|
unsigned Cost = (PFEntry >> 30);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-17 13:28:54 +08:00
|
|
|
// Determining when to avoid vperm is tricky. Many things affect the cost
|
|
|
|
// of vperm, particularly how many times the perm mask needs to be computed.
|
|
|
|
// For example, if the perm mask can be hoisted out of a loop or is already
|
|
|
|
// used (perhaps because there are multiple permutes with the same shuffle
|
|
|
|
// mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
|
|
|
|
// the loop requires an extra register.
|
|
|
|
//
|
|
|
|
// As a compromise, we only emit discrete instructions if the shuffle can be
|
2009-02-18 06:15:04 +08:00
|
|
|
// generated in 3 or fewer operations. When we have loop information
|
2006-04-17 13:28:54 +08:00
|
|
|
// available, if this block is within a loop, we should avoid using vperm
|
|
|
|
// for 3-operation perms and use a constant pool load instead.
|
2009-02-18 06:15:04 +08:00
|
|
|
if (Cost < 3)
|
2009-02-06 09:31:28 +08:00
|
|
|
return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
|
2006-04-17 13:28:54 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-14 13:19:18 +08:00
|
|
|
// Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
|
|
|
|
// vector that will get spilled to the constant pool.
|
|
|
|
if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-14 13:19:18 +08:00
|
|
|
// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
|
|
|
|
// that it is in input element units, not in bytes. Convert now.
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT EltVT = V1.getValueType().getVectorElementType();
|
2008-06-06 20:08:01 +08:00
|
|
|
unsigned BytesPerElement = EltVT.getSizeInBits()/8;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SmallVector<SDValue, 16> ResultMask;
|
2009-04-28 02:41:29 +08:00
|
|
|
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
|
|
|
|
unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-14 13:19:18 +08:00
|
|
|
for (unsigned j = 0; j != BytesPerElement; ++j)
|
|
|
|
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
|
2009-08-12 04:47:22 +08:00
|
|
|
MVT::i32));
|
2006-04-14 13:19:18 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
|
2009-02-26 06:49:59 +08:00
|
|
|
&ResultMask[0], ResultMask.size());
|
2009-02-06 09:31:28 +08:00
|
|
|
return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
|
2006-04-14 13:19:18 +08:00
|
|
|
}
|
|
|
|
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
|
|
|
|
/// altivec comparison. If it is, return true and fill in Opc/isDot with
|
|
|
|
/// information about the intrinsic.
|
2008-07-28 05:46:04 +08:00
|
|
|
static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
bool &isDot) {
|
2008-09-13 00:56:44 +08:00
|
|
|
unsigned IntrinsicID =
|
|
|
|
cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
CompareOpc = -1;
|
|
|
|
isDot = false;
|
|
|
|
switch (IntrinsicID) {
|
|
|
|
default: return false;
|
|
|
|
// Comparison predicates.
|
2006-04-14 14:01:58 +08:00
|
|
|
case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
// Normal Comparisons.
|
|
|
|
case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
|
2005-09-07 06:03:27 +08:00
|
|
|
}
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
|
|
|
|
/// lower, do it, otherwise return null.
|
2009-02-18 06:15:04 +08:00
|
|
|
SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
2008-03-05 07:17:14 +08:00
|
|
|
SelectionDAG &DAG) {
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
// If this is a lowered altivec predicate compare, CompareOpc is set to the
|
|
|
|
// opcode number of the comparison.
|
2009-02-06 06:07:54 +08:00
|
|
|
DebugLoc dl = Op.getDebugLoc();
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
int CompareOpc;
|
|
|
|
bool isDot;
|
|
|
|
if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
|
2008-07-28 05:46:04 +08:00
|
|
|
return SDValue(); // Don't custom lower most intrinsics.
|
2009-02-18 06:15:04 +08:00
|
|
|
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
// If this is a non-dot comparison, make the VCMP node and we are done.
|
2006-04-14 14:01:58 +08:00
|
|
|
if (!isDot) {
|
2009-02-06 06:07:54 +08:00
|
|
|
SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
|
2006-04-14 14:01:58 +08:00
|
|
|
Op.getOperand(1), Op.getOperand(2),
|
2009-08-12 04:47:22 +08:00
|
|
|
DAG.getConstant(CompareOpc, MVT::i32));
|
2009-02-06 06:07:54 +08:00
|
|
|
return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Tmp);
|
2006-04-14 14:01:58 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
// Create the PPCISD altivec 'dot' comparison node.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Ops[] = {
|
2006-08-12 01:18:05 +08:00
|
|
|
Op.getOperand(2), // LHS
|
|
|
|
Op.getOperand(3), // RHS
|
2009-08-12 04:47:22 +08:00
|
|
|
DAG.getConstant(CompareOpc, MVT::i32)
|
2006-08-12 01:18:05 +08:00
|
|
|
};
|
2009-08-11 06:56:29 +08:00
|
|
|
std::vector<EVT> VTs;
|
2006-04-14 14:01:58 +08:00
|
|
|
VTs.push_back(Op.getOperand(2).getValueType());
|
2009-08-12 04:47:22 +08:00
|
|
|
VTs.push_back(MVT::Flag);
|
2009-02-06 06:07:54 +08:00
|
|
|
SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
// Now that we have the comparison, emit a copy from the CR to a GPR.
|
|
|
|
// This is flagged to the above dot comparison.
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32,
|
|
|
|
DAG.getRegister(PPC::CR6, MVT::i32),
|
2009-02-18 06:15:04 +08:00
|
|
|
CompNode.getValue(1));
|
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
// Unpack the result based on how the target uses it.
|
|
|
|
unsigned BitNo; // Bit # of CR6.
|
|
|
|
bool InvertBit; // Invert result?
|
2008-09-13 00:56:44 +08:00
|
|
|
switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
|
2006-04-14 14:01:58 +08:00
|
|
|
default: // Can't happen, don't crash on invalid number though.
|
|
|
|
case 0: // Return the value of the EQ bit of CR6.
|
|
|
|
BitNo = 0; InvertBit = false;
|
2006-03-24 15:53:47 +08:00
|
|
|
break;
|
2006-04-14 14:01:58 +08:00
|
|
|
case 1: // Return the inverted value of the EQ bit of CR6.
|
|
|
|
BitNo = 0; InvertBit = true;
|
2005-09-01 05:09:52 +08:00
|
|
|
break;
|
2006-04-14 14:01:58 +08:00
|
|
|
case 2: // Return the value of the LT bit of CR6.
|
|
|
|
BitNo = 2; InvertBit = false;
|
|
|
|
break;
|
|
|
|
case 3: // Return the inverted value of the LT bit of CR6.
|
|
|
|
BitNo = 2; InvertBit = true;
|
Codegen
bool %test(int %X) {
%Y = seteq int %X, 13
ret bool %Y
}
as
_test:
addi r2, r3, -13
cntlzw r2, r2
srwi r3, r2, 5
blr
rather than
_test:
cmpwi cr7, r3, 13
mfcr r2
rlwinm r3, r2, 31, 31, 31
blr
This has very little effect on most code, but speeds up analyzer 23% and
mason 11%
llvm-svn: 25848
2006-01-31 16:17:29 +08:00
|
|
|
break;
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
// Shift the bit into the low position.
|
2009-08-12 04:47:22 +08:00
|
|
|
Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
|
|
|
|
DAG.getConstant(8-(3-BitNo), MVT::i32));
|
2006-04-14 14:01:58 +08:00
|
|
|
// Isolate the bit.
|
2009-08-12 04:47:22 +08:00
|
|
|
Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
|
|
|
|
DAG.getConstant(1, MVT::i32));
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
// If we are supposed to, toggle the bit.
|
|
|
|
if (InvertBit)
|
2009-08-12 04:47:22 +08:00
|
|
|
Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
|
|
|
|
DAG.getConstant(1, MVT::i32));
|
2006-04-14 14:01:58 +08:00
|
|
|
return Flags;
|
|
|
|
}
|
|
|
|
|
2009-02-18 06:15:04 +08:00
|
|
|
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
|
2008-03-05 07:17:14 +08:00
|
|
|
SelectionDAG &DAG) {
|
2009-02-08 03:59:05 +08:00
|
|
|
DebugLoc dl = Op.getDebugLoc();
|
2006-04-14 14:01:58 +08:00
|
|
|
// Create a stack slot that is 16-byte aligned.
|
|
|
|
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
|
|
|
|
int FrameIdx = FrameInfo->CreateStackObject(16, 16);
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
// Store the input value into Value#0 of the stack slot.
|
2009-02-05 04:06:27 +08:00
|
|
|
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
|
2006-10-14 05:14:26 +08:00
|
|
|
Op.getOperand(0), FIdx, NULL, 0);
|
2006-04-14 14:01:58 +08:00
|
|
|
// Load it out.
|
2009-02-05 04:06:27 +08:00
|
|
|
return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0);
|
2006-04-14 14:01:58 +08:00
|
|
|
}
|
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) {
|
2009-02-06 09:31:28 +08:00
|
|
|
DebugLoc dl = Op.getDebugLoc();
|
2009-08-12 04:47:22 +08:00
|
|
|
if (Op.getValueType() == MVT::v4i32) {
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl);
|
|
|
|
SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue RHSSwap = // = vrlw RHS, 16
|
2009-02-06 09:31:28 +08:00
|
|
|
BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
Lower v8i16 multiply into this code:
li r5, lo16(LCPI1_0)
lis r6, ha16(LCPI1_0)
lvx v4, r6, r5
vmulouh v5, v3, v2
vmuleuh v2, v3, v2
vperm v2, v2, v5, v4
where v4 is:
LCPI1_0: ; <16 x ubyte>
.byte 2
.byte 3
.byte 18
.byte 19
.byte 6
.byte 7
.byte 22
.byte 23
.byte 10
.byte 11
.byte 26
.byte 27
.byte 14
.byte 15
.byte 30
.byte 31
This is 5.07x faster on the G5 (measured) than lowering to scalar code +
loads/stores.
llvm-svn: 27789
2006-04-18 11:43:48 +08:00
|
|
|
// Shrinkify inputs to v8i16.
|
2009-08-12 04:47:22 +08:00
|
|
|
LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, LHS);
|
|
|
|
RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, RHS);
|
|
|
|
RHSSwap = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, RHSSwap);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
Lower v8i16 multiply into this code:
li r5, lo16(LCPI1_0)
lis r6, ha16(LCPI1_0)
lvx v4, r6, r5
vmulouh v5, v3, v2
vmuleuh v2, v3, v2
vperm v2, v2, v5, v4
where v4 is:
LCPI1_0: ; <16 x ubyte>
.byte 2
.byte 3
.byte 18
.byte 19
.byte 6
.byte 7
.byte 22
.byte 23
.byte 10
.byte 11
.byte 26
.byte 27
.byte 14
.byte 15
.byte 30
.byte 31
This is 5.07x faster on the G5 (measured) than lowering to scalar code +
loads/stores.
llvm-svn: 27789
2006-04-18 11:43:48 +08:00
|
|
|
// Low parts multiplied together, generating 32-bit results (we ignore the
|
|
|
|
// top parts).
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
|
2009-08-12 04:47:22 +08:00
|
|
|
LHS, RHS, DAG, dl, MVT::v4i32);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
|
2009-08-12 04:47:22 +08:00
|
|
|
LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
|
Lower v8i16 multiply into this code:
li r5, lo16(LCPI1_0)
lis r6, ha16(LCPI1_0)
lvx v4, r6, r5
vmulouh v5, v3, v2
vmuleuh v2, v3, v2
vperm v2, v2, v5, v4
where v4 is:
LCPI1_0: ; <16 x ubyte>
.byte 2
.byte 3
.byte 18
.byte 19
.byte 6
.byte 7
.byte 22
.byte 23
.byte 10
.byte 11
.byte 26
.byte 27
.byte 14
.byte 15
.byte 30
.byte 31
This is 5.07x faster on the G5 (measured) than lowering to scalar code +
loads/stores.
llvm-svn: 27789
2006-04-18 11:43:48 +08:00
|
|
|
// Shift the high parts up 16 bits.
|
2009-02-18 06:15:04 +08:00
|
|
|
HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
|
2009-02-06 09:31:28 +08:00
|
|
|
Neg16, DAG, dl);
|
2009-08-12 04:47:22 +08:00
|
|
|
return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
|
|
|
|
} else if (Op.getValueType() == MVT::v8i16) {
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
|
2006-04-18 12:28:57 +08:00
|
|
|
|
|
|
|
return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
|
2009-02-06 09:31:28 +08:00
|
|
|
LHS, RHS, Zero, DAG, dl);
|
2009-08-12 04:47:22 +08:00
|
|
|
} else if (Op.getValueType() == MVT::v16i8) {
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-18 11:57:35 +08:00
|
|
|
// Multiply the even 8-bit parts, producing 16-bit sums.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
|
2009-08-12 04:47:22 +08:00
|
|
|
LHS, RHS, DAG, dl, MVT::v8i16);
|
|
|
|
EvenParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, EvenParts);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-18 11:57:35 +08:00
|
|
|
// Multiply the odd 8-bit parts, producing 16-bit sums.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
|
2009-08-12 04:47:22 +08:00
|
|
|
LHS, RHS, DAG, dl, MVT::v8i16);
|
|
|
|
OddParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OddParts);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-18 11:57:35 +08:00
|
|
|
// Merge the results together.
|
2009-04-28 02:41:29 +08:00
|
|
|
int Ops[16];
|
2006-04-18 11:57:35 +08:00
|
|
|
for (unsigned i = 0; i != 8; ++i) {
|
2009-04-28 02:41:29 +08:00
|
|
|
Ops[i*2 ] = 2*i+1;
|
|
|
|
Ops[i*2+1] = 2*i+1+16;
|
2006-04-18 11:57:35 +08:00
|
|
|
}
|
2009-08-12 04:47:22 +08:00
|
|
|
return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
|
Lower v8i16 multiply into this code:
li r5, lo16(LCPI1_0)
lis r6, ha16(LCPI1_0)
lvx v4, r6, r5
vmulouh v5, v3, v2
vmuleuh v2, v3, v2
vperm v2, v2, v5, v4
where v4 is:
LCPI1_0: ; <16 x ubyte>
.byte 2
.byte 3
.byte 18
.byte 19
.byte 6
.byte 7
.byte 22
.byte 23
.byte 10
.byte 11
.byte 26
.byte 27
.byte 14
.byte 15
.byte 30
.byte 31
This is 5.07x faster on the G5 (measured) than lowering to scalar code +
loads/stores.
llvm-svn: 27789
2006-04-18 11:43:48 +08:00
|
|
|
} else {
|
2009-07-15 00:55:14 +08:00
|
|
|
llvm_unreachable("Unknown mul to lower!");
|
Lower v8i16 multiply into this code:
li r5, lo16(LCPI1_0)
lis r6, ha16(LCPI1_0)
lvx v4, r6, r5
vmulouh v5, v3, v2
vmuleuh v2, v3, v2
vperm v2, v2, v5, v4
where v4 is:
LCPI1_0: ; <16 x ubyte>
.byte 2
.byte 3
.byte 18
.byte 19
.byte 6
.byte 7
.byte 22
.byte 23
.byte 10
.byte 11
.byte 26
.byte 27
.byte 14
.byte 15
.byte 30
.byte 31
This is 5.07x faster on the G5 (measured) than lowering to scalar code +
loads/stores.
llvm-svn: 27789
2006-04-18 11:43:48 +08:00
|
|
|
}
|
2006-04-18 11:24:30 +08:00
|
|
|
}
|
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
/// LowerOperation - Provide custom lowering hooks for some operations.
|
|
|
|
///
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
|
2006-04-14 14:01:58 +08:00
|
|
|
switch (Op.getOpcode()) {
|
2009-07-15 00:55:14 +08:00
|
|
|
default: llvm_unreachable("Wasn't expecting to be able to lower this!");
|
2006-04-14 14:01:58 +08:00
|
|
|
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
|
|
|
|
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
|
2007-07-12 01:19:51 +08:00
|
|
|
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
|
2006-04-23 02:53:45 +08:00
|
|
|
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
|
2006-04-14 14:01:58 +08:00
|
|
|
case ISD::SETCC: return LowerSETCC(Op, DAG);
|
2008-09-17 08:30:57 +08:00
|
|
|
case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
|
2009-02-18 06:15:04 +08:00
|
|
|
case ISD::VASTART:
|
2007-04-03 21:59:52 +08:00
|
|
|
return LowerVASTART(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset,
|
|
|
|
VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
|
|
|
case ISD::VAARG:
|
2007-04-03 21:59:52 +08:00
|
|
|
return LowerVAARG(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset,
|
|
|
|
VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);
|
|
|
|
|
2006-12-05 06:04:42 +08:00
|
|
|
case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
|
2007-02-25 13:34:32 +08:00
|
|
|
case ISD::DYNAMIC_STACKALLOC:
|
|
|
|
return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
|
2008-04-19 09:30:48 +08:00
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
|
2009-06-05 04:53:52 +08:00
|
|
|
case ISD::FP_TO_UINT:
|
|
|
|
case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
|
2009-02-06 06:07:54 +08:00
|
|
|
Op.getDebugLoc());
|
2006-04-14 14:01:58 +08:00
|
|
|
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
|
2008-01-31 08:41:03 +08:00
|
|
|
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
|
2006-04-14 14:01:58 +08:00
|
|
|
|
|
|
|
// Lower 64-bit shifts.
|
Legalize is no longer limited to cleverness with just constant shift amounts.
Allow it to be clever when possible and fall back to the gross code when needed.
This allows us to compile:
long long foo1(long long X, int C) {
return X << (C|32);
}
long long foo2(long long X, int C) {
return X << (C&~32);
}
to:
_foo1:
rlwinm r2, r5, 0, 27, 31
slw r3, r4, r2
li r4, 0
blr
.globl _foo2
.align 4
_foo2:
rlwinm r2, r5, 0, 27, 25
subfic r5, r2, 32
slw r3, r3, r2
srw r5, r4, r5
or r3, r3, r5
slw r4, r4, r2
blr
instead of:
_foo1:
ori r2, r5, 32
subfic r5, r2, 32
addi r6, r2, -32
srw r5, r4, r5
slw r3, r3, r2
slw r6, r4, r6
or r3, r3, r5
slw r4, r4, r2
or r3, r3, r6
blr
.globl _foo2
.align 4
_foo2:
rlwinm r2, r5, 0, 27, 25
subfic r5, r2, 32
addi r6, r2, -32
srw r5, r4, r5
slw r3, r3, r2
slw r6, r4, r6
or r3, r3, r5
slw r4, r4, r2
or r3, r3, r6
blr
llvm-svn: 30507
2006-09-20 11:47:40 +08:00
|
|
|
case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
|
|
|
|
case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
|
|
|
|
case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
|
2006-04-14 14:01:58 +08:00
|
|
|
|
|
|
|
// Vector-related lowering.
|
|
|
|
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
|
|
|
|
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
|
|
|
|
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
|
|
|
|
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
|
2006-04-18 11:24:30 +08:00
|
|
|
case ISD::MUL: return LowerMUL(Op, DAG);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2007-12-08 14:59:59 +08:00
|
|
|
// Frame & Return address.
|
|
|
|
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
|
2007-03-01 21:11:38 +08:00
|
|
|
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
|
2005-09-01 04:23:54 +08:00
|
|
|
}
|
2008-07-28 05:46:04 +08:00
|
|
|
return SDValue();
|
2005-08-26 08:52:45 +08:00
|
|
|
}
|
|
|
|
|
2008-12-01 19:39:25 +08:00
|
|
|
void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
|
|
|
|
SmallVectorImpl<SDValue>&Results,
|
|
|
|
SelectionDAG &DAG) {
|
2009-02-06 06:07:54 +08:00
|
|
|
DebugLoc dl = N->getDebugLoc();
|
2007-11-29 02:44:47 +08:00
|
|
|
switch (N->getOpcode()) {
|
2008-10-28 23:00:32 +08:00
|
|
|
default:
|
2008-12-01 19:39:25 +08:00
|
|
|
assert(false && "Do not know how to custom type legalize this operation!");
|
|
|
|
return;
|
|
|
|
case ISD::FP_ROUND_INREG: {
|
2009-08-12 04:47:22 +08:00
|
|
|
assert(N->getValueType(0) == MVT::ppcf128);
|
|
|
|
assert(N->getOperand(0).getValueType() == MVT::ppcf128);
|
2009-02-18 06:15:04 +08:00
|
|
|
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
|
2009-08-12 04:47:22 +08:00
|
|
|
MVT::f64, N->getOperand(0),
|
2008-12-01 19:39:25 +08:00
|
|
|
DAG.getIntPtrConstant(0));
|
2009-02-06 06:07:54 +08:00
|
|
|
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
|
2009-08-12 04:47:22 +08:00
|
|
|
MVT::f64, N->getOperand(0),
|
2008-12-01 19:39:25 +08:00
|
|
|
DAG.getIntPtrConstant(1));
|
|
|
|
|
|
|
|
// This sequence changes FPSCR to do round-to-zero, adds the two halves
|
|
|
|
// of the long double, and puts FPSCR back the way it was. We do not
|
|
|
|
// actually model FPSCR.
|
2009-08-11 06:56:29 +08:00
|
|
|
std::vector<EVT> NodeTys;
|
2008-12-01 19:39:25 +08:00
|
|
|
SDValue Ops[4], Result, MFFSreg, InFlag, FPreg;
|
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
NodeTys.push_back(MVT::f64); // Return register
|
|
|
|
NodeTys.push_back(MVT::Flag); // Returns a flag for later insns
|
2009-02-06 06:07:54 +08:00
|
|
|
Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
|
2008-12-01 19:39:25 +08:00
|
|
|
MFFSreg = Result.getValue(0);
|
|
|
|
InFlag = Result.getValue(1);
|
|
|
|
|
|
|
|
NodeTys.clear();
|
2009-08-12 04:47:22 +08:00
|
|
|
NodeTys.push_back(MVT::Flag); // Returns a flag
|
|
|
|
Ops[0] = DAG.getConstant(31, MVT::i32);
|
2008-12-01 19:39:25 +08:00
|
|
|
Ops[1] = InFlag;
|
2009-02-06 06:07:54 +08:00
|
|
|
Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2);
|
2008-12-01 19:39:25 +08:00
|
|
|
InFlag = Result.getValue(0);
|
|
|
|
|
|
|
|
NodeTys.clear();
|
2009-08-12 04:47:22 +08:00
|
|
|
NodeTys.push_back(MVT::Flag); // Returns a flag
|
|
|
|
Ops[0] = DAG.getConstant(30, MVT::i32);
|
2008-12-01 19:39:25 +08:00
|
|
|
Ops[1] = InFlag;
|
2009-02-06 06:07:54 +08:00
|
|
|
Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2);
|
2008-12-01 19:39:25 +08:00
|
|
|
InFlag = Result.getValue(0);
|
|
|
|
|
|
|
|
NodeTys.clear();
|
2009-08-12 04:47:22 +08:00
|
|
|
NodeTys.push_back(MVT::f64); // result of add
|
|
|
|
NodeTys.push_back(MVT::Flag); // Returns a flag
|
2008-12-01 19:39:25 +08:00
|
|
|
Ops[0] = Lo;
|
|
|
|
Ops[1] = Hi;
|
|
|
|
Ops[2] = InFlag;
|
2009-02-06 06:07:54 +08:00
|
|
|
Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3);
|
2008-12-01 19:39:25 +08:00
|
|
|
FPreg = Result.getValue(0);
|
|
|
|
InFlag = Result.getValue(1);
|
|
|
|
|
|
|
|
NodeTys.clear();
|
2009-08-12 04:47:22 +08:00
|
|
|
NodeTys.push_back(MVT::f64);
|
|
|
|
Ops[0] = DAG.getConstant(1, MVT::i32);
|
2008-12-01 19:39:25 +08:00
|
|
|
Ops[1] = MFFSreg;
|
|
|
|
Ops[2] = FPreg;
|
|
|
|
Ops[3] = InFlag;
|
2009-02-06 06:07:54 +08:00
|
|
|
Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4);
|
2008-12-01 19:39:25 +08:00
|
|
|
FPreg = Result.getValue(0);
|
|
|
|
|
|
|
|
// We know the low half is about to be thrown away, so just use something
|
|
|
|
// convenient.
|
2009-08-12 04:47:22 +08:00
|
|
|
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
|
2009-02-06 06:07:54 +08:00
|
|
|
FPreg, FPreg));
|
2008-12-01 19:39:25 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
case ISD::FP_TO_SINT:
|
2009-06-05 04:53:52 +08:00
|
|
|
Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
|
2008-12-01 19:39:25 +08:00
|
|
|
return;
|
2007-11-29 02:44:47 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Other Lowering Code
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2008-08-26 06:34:37 +08:00
|
|
|
MachineBasicBlock *
|
|
|
|
PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
|
2009-02-08 00:15:20 +08:00
|
|
|
bool is64bit, unsigned BinOpcode) const {
|
2008-08-30 02:29:46 +08:00
|
|
|
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
|
2008-08-26 06:34:37 +08:00
|
|
|
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
|
|
|
|
|
|
|
|
const BasicBlock *LLVM_BB = BB->getBasicBlock();
|
|
|
|
MachineFunction *F = BB->getParent();
|
|
|
|
MachineFunction::iterator It = BB;
|
|
|
|
++It;
|
|
|
|
|
|
|
|
unsigned dest = MI->getOperand(0).getReg();
|
|
|
|
unsigned ptrA = MI->getOperand(1).getReg();
|
|
|
|
unsigned ptrB = MI->getOperand(2).getReg();
|
|
|
|
unsigned incr = MI->getOperand(3).getReg();
|
2009-02-13 10:27:39 +08:00
|
|
|
DebugLoc dl = MI->getDebugLoc();
|
2008-08-26 06:34:37 +08:00
|
|
|
|
|
|
|
MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
|
|
|
|
MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
|
|
|
|
F->insert(It, loopMBB);
|
|
|
|
F->insert(It, exitMBB);
|
|
|
|
exitMBB->transferSuccessors(BB);
|
|
|
|
|
|
|
|
MachineRegisterInfo &RegInfo = F->getRegInfo();
|
2008-08-30 02:29:46 +08:00
|
|
|
unsigned TmpReg = (!BinOpcode) ? incr :
|
|
|
|
RegInfo.createVirtualRegister(
|
2008-09-03 04:30:23 +08:00
|
|
|
is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
|
|
|
|
(const TargetRegisterClass *) &PPC::GPRCRegClass);
|
2008-08-26 06:34:37 +08:00
|
|
|
|
|
|
|
// thisMBB:
|
|
|
|
// ...
|
|
|
|
// fallthrough --> loopMBB
|
|
|
|
BB->addSuccessor(loopMBB);
|
|
|
|
|
|
|
|
// loopMBB:
|
|
|
|
// l[wd]arx dest, ptr
|
|
|
|
// add r0, dest, incr
|
|
|
|
// st[wd]cx. r0, ptr
|
|
|
|
// bne- loopMBB
|
|
|
|
// fallthrough --> exitMBB
|
|
|
|
BB = loopMBB;
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
|
2008-08-26 06:34:37 +08:00
|
|
|
.addReg(ptrA).addReg(ptrB);
|
2008-08-30 02:29:46 +08:00
|
|
|
if (BinOpcode)
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
|
|
|
|
BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
|
2008-08-26 06:34:37 +08:00
|
|
|
.addReg(TmpReg).addReg(ptrA).addReg(ptrB);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::BCC))
|
2009-02-18 06:15:04 +08:00
|
|
|
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
|
2008-08-26 06:34:37 +08:00
|
|
|
BB->addSuccessor(loopMBB);
|
|
|
|
BB->addSuccessor(exitMBB);
|
|
|
|
|
|
|
|
// exitMBB:
|
|
|
|
// ...
|
|
|
|
BB = exitMBB;
|
|
|
|
return BB;
|
|
|
|
}
|
|
|
|
|
2008-08-29 01:53:09 +08:00
|
|
|
MachineBasicBlock *
|
2009-02-18 06:15:04 +08:00
|
|
|
PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
|
2008-08-29 01:53:09 +08:00
|
|
|
MachineBasicBlock *BB,
|
|
|
|
bool is8bit, // operation
|
2009-02-08 00:15:20 +08:00
|
|
|
unsigned BinOpcode) const {
|
2008-08-30 02:29:46 +08:00
|
|
|
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
|
2008-08-29 01:53:09 +08:00
|
|
|
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
|
|
|
|
// In 64 bit mode we have to use 64 bits for addresses, even though the
|
|
|
|
// lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
|
|
|
|
// registers without caring whether they're 32 or 64, but here we're
|
|
|
|
// doing actual arithmetic on the addresses.
|
|
|
|
bool is64bit = PPCSubTarget.isPPC64();
|
|
|
|
|
|
|
|
const BasicBlock *LLVM_BB = BB->getBasicBlock();
|
|
|
|
MachineFunction *F = BB->getParent();
|
|
|
|
MachineFunction::iterator It = BB;
|
|
|
|
++It;
|
|
|
|
|
|
|
|
unsigned dest = MI->getOperand(0).getReg();
|
|
|
|
unsigned ptrA = MI->getOperand(1).getReg();
|
|
|
|
unsigned ptrB = MI->getOperand(2).getReg();
|
|
|
|
unsigned incr = MI->getOperand(3).getReg();
|
2009-02-13 10:27:39 +08:00
|
|
|
DebugLoc dl = MI->getDebugLoc();
|
2008-08-29 01:53:09 +08:00
|
|
|
|
|
|
|
MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
|
|
|
|
MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
|
|
|
|
F->insert(It, loopMBB);
|
|
|
|
F->insert(It, exitMBB);
|
|
|
|
exitMBB->transferSuccessors(BB);
|
|
|
|
|
|
|
|
MachineRegisterInfo &RegInfo = F->getRegInfo();
|
2009-02-18 06:15:04 +08:00
|
|
|
const TargetRegisterClass *RC =
|
2008-09-03 04:30:23 +08:00
|
|
|
is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
|
|
|
|
(const TargetRegisterClass *) &PPC::GPRCRegClass;
|
2008-08-29 01:53:09 +08:00
|
|
|
unsigned PtrReg = RegInfo.createVirtualRegister(RC);
|
|
|
|
unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
|
|
|
|
unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
|
|
|
|
unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
|
|
|
|
unsigned MaskReg = RegInfo.createVirtualRegister(RC);
|
|
|
|
unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
|
|
|
|
unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
|
|
|
|
unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
|
|
|
|
unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
|
|
|
|
unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
|
2008-08-30 02:29:46 +08:00
|
|
|
unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
|
2008-08-29 01:53:09 +08:00
|
|
|
unsigned Ptr1Reg;
|
2008-08-30 02:29:46 +08:00
|
|
|
unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
|
2008-08-29 01:53:09 +08:00
|
|
|
|
|
|
|
// thisMBB:
|
|
|
|
// ...
|
|
|
|
// fallthrough --> loopMBB
|
|
|
|
BB->addSuccessor(loopMBB);
|
|
|
|
|
|
|
|
// The 4-byte load must be aligned, while a char or short may be
|
|
|
|
// anywhere in the word. Hence all this nasty bookkeeping code.
|
|
|
|
// add ptr1, ptrA, ptrB [copy if ptrA==0]
|
|
|
|
// rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
|
2008-09-03 04:30:23 +08:00
|
|
|
// xori shift, shift1, 24 [16]
|
2008-08-29 01:53:09 +08:00
|
|
|
// rlwinm ptr, ptr1, 0, 0, 29
|
|
|
|
// slw incr2, incr, shift
|
|
|
|
// li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
|
|
|
|
// slw mask, mask2, shift
|
|
|
|
// loopMBB:
|
2008-08-30 08:08:53 +08:00
|
|
|
// lwarx tmpDest, ptr
|
2008-08-30 02:29:46 +08:00
|
|
|
// add tmp, tmpDest, incr2
|
|
|
|
// andc tmp2, tmpDest, mask
|
2008-08-29 01:53:09 +08:00
|
|
|
// and tmp3, tmp, mask
|
|
|
|
// or tmp4, tmp3, tmp2
|
2008-08-30 08:08:53 +08:00
|
|
|
// stwcx. tmp4, ptr
|
2008-08-29 01:53:09 +08:00
|
|
|
// bne- loopMBB
|
|
|
|
// fallthrough --> exitMBB
|
2008-08-30 02:29:46 +08:00
|
|
|
// srw dest, tmpDest, shift
|
2008-08-29 01:53:09 +08:00
|
|
|
|
|
|
|
if (ptrA!=PPC::R0) {
|
|
|
|
Ptr1Reg = RegInfo.createVirtualRegister(RC);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
|
2008-08-29 01:53:09 +08:00
|
|
|
.addReg(ptrA).addReg(ptrB);
|
|
|
|
} else {
|
|
|
|
Ptr1Reg = ptrB;
|
|
|
|
}
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
|
2008-08-29 01:53:09 +08:00
|
|
|
.addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
|
2008-08-29 01:53:09 +08:00
|
|
|
.addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
|
|
|
|
if (is64bit)
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
|
2008-08-29 01:53:09 +08:00
|
|
|
.addReg(Ptr1Reg).addImm(0).addImm(61);
|
|
|
|
else
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
|
2008-08-29 01:53:09 +08:00
|
|
|
.addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
|
2008-08-29 01:53:09 +08:00
|
|
|
.addReg(incr).addReg(ShiftReg);
|
|
|
|
if (is8bit)
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
|
2008-08-29 01:53:09 +08:00
|
|
|
else {
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
|
|
|
|
BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
|
2008-08-29 01:53:09 +08:00
|
|
|
}
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
|
2008-08-29 01:53:09 +08:00
|
|
|
.addReg(Mask2Reg).addReg(ShiftReg);
|
|
|
|
|
|
|
|
BB = loopMBB;
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
|
2008-08-29 01:53:09 +08:00
|
|
|
.addReg(PPC::R0).addReg(PtrReg);
|
2008-08-30 02:29:46 +08:00
|
|
|
if (BinOpcode)
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
|
2008-08-30 02:29:46 +08:00
|
|
|
.addReg(Incr2Reg).addReg(TmpDestReg);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
|
2008-08-30 02:29:46 +08:00
|
|
|
.addReg(TmpDestReg).addReg(MaskReg);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
|
2008-08-29 01:53:09 +08:00
|
|
|
.addReg(TmpReg).addReg(MaskReg);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
|
2008-08-29 01:53:09 +08:00
|
|
|
.addReg(Tmp3Reg).addReg(Tmp2Reg);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::STWCX))
|
2008-08-29 01:53:09 +08:00
|
|
|
.addReg(Tmp4Reg).addReg(PPC::R0).addReg(PtrReg);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::BCC))
|
2009-02-18 06:15:04 +08:00
|
|
|
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
|
2008-08-29 01:53:09 +08:00
|
|
|
BB->addSuccessor(loopMBB);
|
|
|
|
BB->addSuccessor(exitMBB);
|
|
|
|
|
|
|
|
// exitMBB:
|
|
|
|
// ...
|
|
|
|
BB = exitMBB;
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg).addReg(ShiftReg);
|
2008-08-29 01:53:09 +08:00
|
|
|
return BB;
|
|
|
|
}
|
|
|
|
|
2005-08-27 05:23:58 +08:00
|
|
|
MachineBasicBlock *
|
2008-01-31 02:18:23 +08:00
|
|
|
PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
|
2009-09-19 05:02:19 +08:00
|
|
|
MachineBasicBlock *BB,
|
|
|
|
DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
|
2006-11-28 07:37:22 +08:00
|
|
|
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
|
2008-07-12 10:23:19 +08:00
|
|
|
|
|
|
|
// To "insert" these instructions we actually have to insert their
|
|
|
|
// control-flow patterns.
|
2005-08-27 05:23:58 +08:00
|
|
|
const BasicBlock *LLVM_BB = BB->getBasicBlock();
|
2008-07-08 07:14:23 +08:00
|
|
|
MachineFunction::iterator It = BB;
|
2005-08-27 05:23:58 +08:00
|
|
|
++It;
|
2008-07-12 10:23:19 +08:00
|
|
|
|
2008-07-08 07:14:23 +08:00
|
|
|
MachineFunction *F = BB->getParent();
|
2008-07-12 10:23:19 +08:00
|
|
|
|
|
|
|
if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
|
|
|
|
MI->getOpcode() == PPC::SELECT_CC_I8 ||
|
|
|
|
MI->getOpcode() == PPC::SELECT_CC_F4 ||
|
|
|
|
MI->getOpcode() == PPC::SELECT_CC_F8 ||
|
|
|
|
MI->getOpcode() == PPC::SELECT_CC_VRRC) {
|
|
|
|
|
|
|
|
// The incoming instruction knows the destination vreg to set, the
|
|
|
|
// condition code register to branch on, the true/false values to
|
|
|
|
// select between, and a branch opcode to use.
|
|
|
|
|
|
|
|
// thisMBB:
|
|
|
|
// ...
|
|
|
|
// TrueVal = ...
|
|
|
|
// cmpTY ccX, r1, r2
|
|
|
|
// bCC copy1MBB
|
|
|
|
// fallthrough --> copy0MBB
|
|
|
|
MachineBasicBlock *thisMBB = BB;
|
|
|
|
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
|
|
|
|
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
|
|
|
|
unsigned SelectPred = MI->getOperand(4).getImm();
|
2009-02-13 10:27:39 +08:00
|
|
|
DebugLoc dl = MI->getDebugLoc();
|
|
|
|
BuildMI(BB, dl, TII->get(PPC::BCC))
|
2008-07-12 10:23:19 +08:00
|
|
|
.addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
|
|
|
|
F->insert(It, copy0MBB);
|
|
|
|
F->insert(It, sinkMBB);
|
2009-09-19 17:51:03 +08:00
|
|
|
// Update machine-CFG edges by first adding all successors of the current
|
2008-07-12 10:23:19 +08:00
|
|
|
// block to the new block which will contain the Phi node for the select.
|
2009-09-19 17:51:03 +08:00
|
|
|
// Also inform sdisel of the edge changes.
|
|
|
|
for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
|
|
|
|
E = BB->succ_end(); I != E; ++I) {
|
|
|
|
EM->insert(std::make_pair(*I, sinkMBB));
|
|
|
|
sinkMBB->addSuccessor(*I);
|
|
|
|
}
|
|
|
|
// Next, remove all successors of the current block, and add the true
|
|
|
|
// and fallthrough blocks as its successors.
|
|
|
|
while (!BB->succ_empty())
|
|
|
|
BB->removeSuccessor(BB->succ_begin());
|
2008-07-12 10:23:19 +08:00
|
|
|
// Next, add the true and fallthrough blocks as its successors.
|
|
|
|
BB->addSuccessor(copy0MBB);
|
|
|
|
BB->addSuccessor(sinkMBB);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-07-12 10:23:19 +08:00
|
|
|
// copy0MBB:
|
|
|
|
// %FalseValue = ...
|
|
|
|
// # fallthrough to sinkMBB
|
|
|
|
BB = copy0MBB;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-07-12 10:23:19 +08:00
|
|
|
// Update machine-CFG edges
|
|
|
|
BB->addSuccessor(sinkMBB);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-07-12 10:23:19 +08:00
|
|
|
// sinkMBB:
|
|
|
|
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
|
|
|
|
// ...
|
|
|
|
BB = sinkMBB;
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::PHI), MI->getOperand(0).getReg())
|
2008-07-12 10:23:19 +08:00
|
|
|
.addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
|
|
|
|
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
|
|
|
|
}
|
2008-08-29 01:53:09 +08:00
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
|
|
|
|
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
|
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
|
|
|
|
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
|
2008-08-26 06:34:37 +08:00
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
|
|
|
|
BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
|
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
|
|
|
|
BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
|
2008-08-29 01:53:09 +08:00
|
|
|
|
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
|
|
|
|
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
|
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
|
|
|
|
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
|
2008-08-26 06:34:37 +08:00
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
|
|
|
|
BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
|
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
|
|
|
|
BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
|
2008-08-29 01:53:09 +08:00
|
|
|
|
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
|
|
|
|
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
|
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
|
|
|
|
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
|
2008-08-26 06:34:37 +08:00
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
|
|
|
|
BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
|
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
|
|
|
|
BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
|
2008-08-29 01:53:09 +08:00
|
|
|
|
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
|
|
|
|
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
|
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
|
|
|
|
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
|
2008-08-26 06:34:37 +08:00
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
|
|
|
|
BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
|
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
|
|
|
|
BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
|
2008-08-29 01:53:09 +08:00
|
|
|
|
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
|
2008-09-11 10:15:03 +08:00
|
|
|
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC);
|
2008-08-29 01:53:09 +08:00
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
|
2008-09-11 10:15:03 +08:00
|
|
|
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC);
|
2008-08-26 06:34:37 +08:00
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
|
2008-09-11 10:15:03 +08:00
|
|
|
BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC);
|
2008-08-26 06:34:37 +08:00
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
|
2008-09-11 10:15:03 +08:00
|
|
|
BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8);
|
2008-08-29 01:53:09 +08:00
|
|
|
|
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
|
|
|
|
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
|
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
|
|
|
|
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
|
2008-08-26 06:34:37 +08:00
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
|
|
|
|
BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
|
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
|
|
|
|
BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
|
2008-08-29 01:53:09 +08:00
|
|
|
|
2008-08-30 02:29:46 +08:00
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
|
|
|
|
BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
|
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
|
|
|
|
BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
|
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
|
|
|
|
BB = EmitAtomicBinary(MI, BB, false, 0);
|
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
|
|
|
|
BB = EmitAtomicBinary(MI, BB, true, 0);
|
|
|
|
|
2008-07-12 10:23:19 +08:00
|
|
|
else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
|
|
|
|
MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
|
|
|
|
bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
|
|
|
|
|
|
|
|
unsigned dest = MI->getOperand(0).getReg();
|
|
|
|
unsigned ptrA = MI->getOperand(1).getReg();
|
|
|
|
unsigned ptrB = MI->getOperand(2).getReg();
|
|
|
|
unsigned oldval = MI->getOperand(3).getReg();
|
|
|
|
unsigned newval = MI->getOperand(4).getReg();
|
2009-02-13 10:27:39 +08:00
|
|
|
DebugLoc dl = MI->getDebugLoc();
|
2008-07-12 10:23:19 +08:00
|
|
|
|
2008-08-26 02:53:26 +08:00
|
|
|
MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
|
|
|
|
MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
|
|
|
|
MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
|
2008-07-12 10:23:19 +08:00
|
|
|
MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
|
2008-08-26 02:53:26 +08:00
|
|
|
F->insert(It, loop1MBB);
|
|
|
|
F->insert(It, loop2MBB);
|
|
|
|
F->insert(It, midMBB);
|
2008-07-12 10:23:19 +08:00
|
|
|
F->insert(It, exitMBB);
|
|
|
|
exitMBB->transferSuccessors(BB);
|
|
|
|
|
|
|
|
// thisMBB:
|
|
|
|
// ...
|
|
|
|
// fallthrough --> loopMBB
|
2008-08-26 02:53:26 +08:00
|
|
|
BB->addSuccessor(loop1MBB);
|
2008-07-12 10:23:19 +08:00
|
|
|
|
2008-08-26 02:53:26 +08:00
|
|
|
// loop1MBB:
|
2008-07-12 10:23:19 +08:00
|
|
|
// l[wd]arx dest, ptr
|
2008-08-26 02:53:26 +08:00
|
|
|
// cmp[wd] dest, oldval
|
|
|
|
// bne- midMBB
|
|
|
|
// loop2MBB:
|
2008-07-12 10:23:19 +08:00
|
|
|
// st[wd]cx. newval, ptr
|
|
|
|
// bne- loopMBB
|
2008-08-26 02:53:26 +08:00
|
|
|
// b exitBB
|
|
|
|
// midMBB:
|
|
|
|
// st[wd]cx. dest, ptr
|
|
|
|
// exitBB:
|
|
|
|
BB = loop1MBB;
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
|
2008-07-12 10:23:19 +08:00
|
|
|
.addReg(ptrA).addReg(ptrB);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
|
2008-07-12 10:23:19 +08:00
|
|
|
.addReg(oldval).addReg(dest);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::BCC))
|
2008-08-26 02:53:26 +08:00
|
|
|
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
|
|
|
|
BB->addSuccessor(loop2MBB);
|
|
|
|
BB->addSuccessor(midMBB);
|
|
|
|
|
|
|
|
BB = loop2MBB;
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
|
2008-07-12 10:23:19 +08:00
|
|
|
.addReg(newval).addReg(ptrA).addReg(ptrB);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::BCC))
|
2008-08-26 02:53:26 +08:00
|
|
|
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
|
2008-08-26 02:53:26 +08:00
|
|
|
BB->addSuccessor(loop1MBB);
|
2008-07-12 10:23:19 +08:00
|
|
|
BB->addSuccessor(exitMBB);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-08-26 02:53:26 +08:00
|
|
|
BB = midMBB;
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
|
2008-08-26 02:53:26 +08:00
|
|
|
.addReg(dest).addReg(ptrA).addReg(ptrB);
|
|
|
|
BB->addSuccessor(exitMBB);
|
|
|
|
|
2008-07-12 10:23:19 +08:00
|
|
|
// exitMBB:
|
|
|
|
// ...
|
|
|
|
BB = exitMBB;
|
2008-08-30 08:08:53 +08:00
|
|
|
} else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
|
|
|
|
MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
|
|
|
|
// We must use 64-bit registers for addresses when targeting 64-bit,
|
|
|
|
// since we're actually doing arithmetic on them. Other registers
|
|
|
|
// can be 32-bit.
|
|
|
|
bool is64bit = PPCSubTarget.isPPC64();
|
|
|
|
bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
|
|
|
|
|
|
|
|
unsigned dest = MI->getOperand(0).getReg();
|
|
|
|
unsigned ptrA = MI->getOperand(1).getReg();
|
|
|
|
unsigned ptrB = MI->getOperand(2).getReg();
|
|
|
|
unsigned oldval = MI->getOperand(3).getReg();
|
|
|
|
unsigned newval = MI->getOperand(4).getReg();
|
2009-02-13 10:27:39 +08:00
|
|
|
DebugLoc dl = MI->getDebugLoc();
|
2008-08-30 08:08:53 +08:00
|
|
|
|
|
|
|
MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
|
|
|
|
MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
|
|
|
|
MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
|
|
|
|
MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
|
|
|
|
F->insert(It, loop1MBB);
|
|
|
|
F->insert(It, loop2MBB);
|
|
|
|
F->insert(It, midMBB);
|
|
|
|
F->insert(It, exitMBB);
|
|
|
|
exitMBB->transferSuccessors(BB);
|
|
|
|
|
|
|
|
MachineRegisterInfo &RegInfo = F->getRegInfo();
|
2009-02-18 06:15:04 +08:00
|
|
|
const TargetRegisterClass *RC =
|
2008-09-03 04:30:23 +08:00
|
|
|
is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
|
|
|
|
(const TargetRegisterClass *) &PPC::GPRCRegClass;
|
2008-08-30 08:08:53 +08:00
|
|
|
unsigned PtrReg = RegInfo.createVirtualRegister(RC);
|
|
|
|
unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
|
|
|
|
unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
|
|
|
|
unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
|
|
|
|
unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
|
|
|
|
unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
|
|
|
|
unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
|
|
|
|
unsigned MaskReg = RegInfo.createVirtualRegister(RC);
|
|
|
|
unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
|
|
|
|
unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
|
|
|
|
unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
|
|
|
|
unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
|
|
|
|
unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
|
|
|
|
unsigned Ptr1Reg;
|
|
|
|
unsigned TmpReg = RegInfo.createVirtualRegister(RC);
|
|
|
|
// thisMBB:
|
|
|
|
// ...
|
|
|
|
// fallthrough --> loopMBB
|
|
|
|
BB->addSuccessor(loop1MBB);
|
|
|
|
|
|
|
|
// The 4-byte load must be aligned, while a char or short may be
|
|
|
|
// anywhere in the word. Hence all this nasty bookkeeping code.
|
|
|
|
// add ptr1, ptrA, ptrB [copy if ptrA==0]
|
|
|
|
// rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
|
2008-09-03 04:30:23 +08:00
|
|
|
// xori shift, shift1, 24 [16]
|
2008-08-30 08:08:53 +08:00
|
|
|
// rlwinm ptr, ptr1, 0, 0, 29
|
|
|
|
// slw newval2, newval, shift
|
|
|
|
// slw oldval2, oldval,shift
|
|
|
|
// li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
|
|
|
|
// slw mask, mask2, shift
|
|
|
|
// and newval3, newval2, mask
|
|
|
|
// and oldval3, oldval2, mask
|
|
|
|
// loop1MBB:
|
|
|
|
// lwarx tmpDest, ptr
|
|
|
|
// and tmp, tmpDest, mask
|
|
|
|
// cmpw tmp, oldval3
|
|
|
|
// bne- midMBB
|
|
|
|
// loop2MBB:
|
|
|
|
// andc tmp2, tmpDest, mask
|
|
|
|
// or tmp4, tmp2, newval3
|
|
|
|
// stwcx. tmp4, ptr
|
|
|
|
// bne- loop1MBB
|
|
|
|
// b exitBB
|
|
|
|
// midMBB:
|
|
|
|
// stwcx. tmpDest, ptr
|
|
|
|
// exitBB:
|
|
|
|
// srw dest, tmpDest, shift
|
|
|
|
if (ptrA!=PPC::R0) {
|
|
|
|
Ptr1Reg = RegInfo.createVirtualRegister(RC);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
|
2008-08-30 08:08:53 +08:00
|
|
|
.addReg(ptrA).addReg(ptrB);
|
|
|
|
} else {
|
|
|
|
Ptr1Reg = ptrB;
|
|
|
|
}
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
|
2008-08-30 08:08:53 +08:00
|
|
|
.addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
|
2008-08-30 08:08:53 +08:00
|
|
|
.addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
|
|
|
|
if (is64bit)
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
|
2008-08-30 08:08:53 +08:00
|
|
|
.addReg(Ptr1Reg).addImm(0).addImm(61);
|
|
|
|
else
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
|
2008-08-30 08:08:53 +08:00
|
|
|
.addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
|
2008-08-30 08:08:53 +08:00
|
|
|
.addReg(newval).addReg(ShiftReg);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
|
2008-08-30 08:08:53 +08:00
|
|
|
.addReg(oldval).addReg(ShiftReg);
|
|
|
|
if (is8bit)
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
|
2008-08-30 08:08:53 +08:00
|
|
|
else {
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
|
|
|
|
BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
|
|
|
|
.addReg(Mask3Reg).addImm(65535);
|
2008-08-30 08:08:53 +08:00
|
|
|
}
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
|
2008-08-30 08:08:53 +08:00
|
|
|
.addReg(Mask2Reg).addReg(ShiftReg);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
|
2008-08-30 08:08:53 +08:00
|
|
|
.addReg(NewVal2Reg).addReg(MaskReg);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
|
2008-08-30 08:08:53 +08:00
|
|
|
.addReg(OldVal2Reg).addReg(MaskReg);
|
|
|
|
|
|
|
|
BB = loop1MBB;
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
|
2008-08-30 08:08:53 +08:00
|
|
|
.addReg(PPC::R0).addReg(PtrReg);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
|
|
|
|
.addReg(TmpDestReg).addReg(MaskReg);
|
|
|
|
BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
|
2008-08-30 08:08:53 +08:00
|
|
|
.addReg(TmpReg).addReg(OldVal3Reg);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::BCC))
|
2008-08-30 08:08:53 +08:00
|
|
|
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
|
|
|
|
BB->addSuccessor(loop2MBB);
|
|
|
|
BB->addSuccessor(midMBB);
|
|
|
|
|
|
|
|
BB = loop2MBB;
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
|
|
|
|
.addReg(TmpDestReg).addReg(MaskReg);
|
|
|
|
BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
|
|
|
|
.addReg(Tmp2Reg).addReg(NewVal3Reg);
|
|
|
|
BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
|
2008-08-30 08:08:53 +08:00
|
|
|
.addReg(PPC::R0).addReg(PtrReg);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::BCC))
|
2008-08-30 08:08:53 +08:00
|
|
|
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
|
2008-08-30 08:08:53 +08:00
|
|
|
BB->addSuccessor(loop1MBB);
|
|
|
|
BB->addSuccessor(exitMBB);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-08-30 08:08:53 +08:00
|
|
|
BB = midMBB;
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
|
2008-08-30 08:08:53 +08:00
|
|
|
.addReg(PPC::R0).addReg(PtrReg);
|
|
|
|
BB->addSuccessor(exitMBB);
|
|
|
|
|
|
|
|
// exitMBB:
|
|
|
|
// ...
|
|
|
|
BB = exitMBB;
|
2009-02-13 10:27:39 +08:00
|
|
|
BuildMI(BB, dl, TII->get(PPC::SRW),dest).addReg(TmpReg).addReg(ShiftReg);
|
2008-08-30 08:08:53 +08:00
|
|
|
} else {
|
2009-07-15 00:55:14 +08:00
|
|
|
llvm_unreachable("Unexpected instr type to insert");
|
2008-07-12 10:23:19 +08:00
|
|
|
}
|
2005-08-27 05:23:58 +08:00
|
|
|
|
2008-07-08 07:14:23 +08:00
|
|
|
F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
|
2005-08-27 05:23:58 +08:00
|
|
|
return BB;
|
|
|
|
}
|
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Target Optimization Hooks
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2008-11-24 22:53:14 +08:00
|
|
|
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
|
|
|
|
DAGCombinerInfo &DCI) const {
|
2006-03-01 12:57:39 +08:00
|
|
|
TargetMachine &TM = getTargetMachine();
|
|
|
|
SelectionDAG &DAG = DCI.DAG;
|
2009-02-06 06:07:54 +08:00
|
|
|
DebugLoc dl = N->getDebugLoc();
|
2006-03-01 12:57:39 +08:00
|
|
|
switch (N->getOpcode()) {
|
|
|
|
default: break;
|
Fold the PPCISD shifts when presented with 0 inputs. This occurs for code
like:
long long test(long long X, int Y) {
return 1ULL << Y;
}
long long test2(long long X, int Y) {
return -1LL << Y;
}
which we used to compile to:
_test:
li r2, 1
subfic r3, r5, 32
li r4, 0
addi r6, r5, -32
srw r3, r2, r3
slw r4, r4, r5
slw r6, r2, r6
or r3, r4, r3
slw r4, r2, r5
or r3, r3, r6
blr
_test2:
li r2, -1
subfic r3, r5, 32
addi r6, r5, -32
srw r3, r2, r3
slw r4, r2, r5
slw r2, r2, r6
or r3, r4, r3
or r3, r3, r2
blr
Now we produce:
_test:
li r2, 1
addi r3, r5, -32
subfic r4, r5, 32
slw r3, r2, r3
srw r4, r2, r4
or r3, r4, r3
slw r4, r2, r5
blr
_test2:
li r2, -1
subfic r3, r5, 32
addi r6, r5, -32
srw r3, r2, r3
slw r4, r2, r5
slw r2, r2, r6
or r3, r4, r3
or r3, r3, r2
blr
llvm-svn: 30479
2006-09-19 13:22:59 +08:00
|
|
|
case PPCISD::SHL:
|
|
|
|
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
|
2008-09-13 00:56:44 +08:00
|
|
|
if (C->getZExtValue() == 0) // 0 << V -> 0.
|
Fold the PPCISD shifts when presented with 0 inputs. This occurs for code
like:
long long test(long long X, int Y) {
return 1ULL << Y;
}
long long test2(long long X, int Y) {
return -1LL << Y;
}
which we used to compile to:
_test:
li r2, 1
subfic r3, r5, 32
li r4, 0
addi r6, r5, -32
srw r3, r2, r3
slw r4, r4, r5
slw r6, r2, r6
or r3, r4, r3
slw r4, r2, r5
or r3, r3, r6
blr
_test2:
li r2, -1
subfic r3, r5, 32
addi r6, r5, -32
srw r3, r2, r3
slw r4, r2, r5
slw r2, r2, r6
or r3, r4, r3
or r3, r3, r2
blr
Now we produce:
_test:
li r2, 1
addi r3, r5, -32
subfic r4, r5, 32
slw r3, r2, r3
srw r4, r2, r4
or r3, r4, r3
slw r4, r2, r5
blr
_test2:
li r2, -1
subfic r3, r5, 32
addi r6, r5, -32
srw r3, r2, r3
slw r4, r2, r5
slw r2, r2, r6
or r3, r4, r3
or r3, r3, r2
blr
llvm-svn: 30479
2006-09-19 13:22:59 +08:00
|
|
|
return N->getOperand(0);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case PPCISD::SRL:
|
|
|
|
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
|
2008-09-13 00:56:44 +08:00
|
|
|
if (C->getZExtValue() == 0) // 0 >>u V -> 0.
|
Fold the PPCISD shifts when presented with 0 inputs. This occurs for code
like:
long long test(long long X, int Y) {
return 1ULL << Y;
}
long long test2(long long X, int Y) {
return -1LL << Y;
}
which we used to compile to:
_test:
li r2, 1
subfic r3, r5, 32
li r4, 0
addi r6, r5, -32
srw r3, r2, r3
slw r4, r4, r5
slw r6, r2, r6
or r3, r4, r3
slw r4, r2, r5
or r3, r3, r6
blr
_test2:
li r2, -1
subfic r3, r5, 32
addi r6, r5, -32
srw r3, r2, r3
slw r4, r2, r5
slw r2, r2, r6
or r3, r4, r3
or r3, r3, r2
blr
Now we produce:
_test:
li r2, 1
addi r3, r5, -32
subfic r4, r5, 32
slw r3, r2, r3
srw r4, r2, r4
or r3, r4, r3
slw r4, r2, r5
blr
_test2:
li r2, -1
subfic r3, r5, 32
addi r6, r5, -32
srw r3, r2, r3
slw r4, r2, r5
slw r2, r2, r6
or r3, r4, r3
or r3, r3, r2
blr
llvm-svn: 30479
2006-09-19 13:22:59 +08:00
|
|
|
return N->getOperand(0);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case PPCISD::SRA:
|
|
|
|
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
|
2008-09-13 00:56:44 +08:00
|
|
|
if (C->getZExtValue() == 0 || // 0 >>s V -> 0.
|
Fold the PPCISD shifts when presented with 0 inputs. This occurs for code
like:
long long test(long long X, int Y) {
return 1ULL << Y;
}
long long test2(long long X, int Y) {
return -1LL << Y;
}
which we used to compile to:
_test:
li r2, 1
subfic r3, r5, 32
li r4, 0
addi r6, r5, -32
srw r3, r2, r3
slw r4, r4, r5
slw r6, r2, r6
or r3, r4, r3
slw r4, r2, r5
or r3, r3, r6
blr
_test2:
li r2, -1
subfic r3, r5, 32
addi r6, r5, -32
srw r3, r2, r3
slw r4, r2, r5
slw r2, r2, r6
or r3, r4, r3
or r3, r3, r2
blr
Now we produce:
_test:
li r2, 1
addi r3, r5, -32
subfic r4, r5, 32
slw r3, r2, r3
srw r4, r2, r4
or r3, r4, r3
slw r4, r2, r5
blr
_test2:
li r2, -1
subfic r3, r5, 32
addi r6, r5, -32
srw r3, r2, r3
slw r4, r2, r5
slw r2, r2, r6
or r3, r4, r3
or r3, r3, r2
blr
llvm-svn: 30479
2006-09-19 13:22:59 +08:00
|
|
|
C->isAllOnesValue()) // -1 >>s V -> -1.
|
|
|
|
return N->getOperand(0);
|
|
|
|
}
|
|
|
|
break;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-03-01 12:57:39 +08:00
|
|
|
case ISD::SINT_TO_FP:
|
2006-06-17 01:34:12 +08:00
|
|
|
if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
|
When possible, custom lower 32-bit SINT_TO_FP to this:
_foo2:
extsw r2, r3
std r2, -8(r1)
lfd f0, -8(r1)
fcfid f0, f0
frsp f1, f0
blr
instead of this:
_foo2:
lis r2, ha16(LCPI2_0)
lis r4, 17200
xoris r3, r3, 32768
stw r3, -4(r1)
stw r4, -8(r1)
lfs f0, lo16(LCPI2_0)(r2)
lfd f1, -8(r1)
fsub f0, f1, f0
frsp f1, f0
blr
This speeds up Misc/pi from 2.44s->2.09s with LLC and from 3.01->2.18s
with llcbeta (16.7% and 38.1% respectively).
llvm-svn: 26943
2006-03-22 13:30:33 +08:00
|
|
|
if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
|
|
|
|
// Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
|
|
|
|
// We allow the src/dst to be either f32/f64, but the intermediate
|
|
|
|
// type must be i64.
|
2009-08-12 04:47:22 +08:00
|
|
|
if (N->getOperand(0).getValueType() == MVT::i64 &&
|
|
|
|
N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Val = N->getOperand(0).getOperand(0);
|
2009-08-12 04:47:22 +08:00
|
|
|
if (Val.getValueType() == MVT::f32) {
|
|
|
|
Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
|
2008-08-29 05:40:38 +08:00
|
|
|
DCI.AddToWorklist(Val.getNode());
|
When possible, custom lower 32-bit SINT_TO_FP to this:
_foo2:
extsw r2, r3
std r2, -8(r1)
lfd f0, -8(r1)
fcfid f0, f0
frsp f1, f0
blr
instead of this:
_foo2:
lis r2, ha16(LCPI2_0)
lis r4, 17200
xoris r3, r3, 32768
stw r3, -4(r1)
stw r4, -8(r1)
lfs f0, lo16(LCPI2_0)(r2)
lfd f1, -8(r1)
fsub f0, f1, f0
frsp f1, f0
blr
This speeds up Misc/pi from 2.44s->2.09s with LLC and from 3.01->2.18s
with llcbeta (16.7% and 38.1% respectively).
llvm-svn: 26943
2006-03-22 13:30:33 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
|
2008-08-29 05:40:38 +08:00
|
|
|
DCI.AddToWorklist(Val.getNode());
|
2009-08-12 04:47:22 +08:00
|
|
|
Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
|
2008-08-29 05:40:38 +08:00
|
|
|
DCI.AddToWorklist(Val.getNode());
|
2009-08-12 04:47:22 +08:00
|
|
|
if (N->getValueType(0) == MVT::f32) {
|
|
|
|
Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
|
2008-01-17 15:00:52 +08:00
|
|
|
DAG.getIntPtrConstant(0));
|
2008-08-29 05:40:38 +08:00
|
|
|
DCI.AddToWorklist(Val.getNode());
|
When possible, custom lower 32-bit SINT_TO_FP to this:
_foo2:
extsw r2, r3
std r2, -8(r1)
lfd f0, -8(r1)
fcfid f0, f0
frsp f1, f0
blr
instead of this:
_foo2:
lis r2, ha16(LCPI2_0)
lis r4, 17200
xoris r3, r3, 32768
stw r3, -4(r1)
stw r4, -8(r1)
lfs f0, lo16(LCPI2_0)(r2)
lfd f1, -8(r1)
fsub f0, f1, f0
frsp f1, f0
blr
This speeds up Misc/pi from 2.44s->2.09s with LLC and from 3.01->2.18s
with llcbeta (16.7% and 38.1% respectively).
llvm-svn: 26943
2006-03-22 13:30:33 +08:00
|
|
|
}
|
|
|
|
return Val;
|
2009-08-12 04:47:22 +08:00
|
|
|
} else if (N->getOperand(0).getValueType() == MVT::i32) {
|
When possible, custom lower 32-bit SINT_TO_FP to this:
_foo2:
extsw r2, r3
std r2, -8(r1)
lfd f0, -8(r1)
fcfid f0, f0
frsp f1, f0
blr
instead of this:
_foo2:
lis r2, ha16(LCPI2_0)
lis r4, 17200
xoris r3, r3, 32768
stw r3, -4(r1)
stw r4, -8(r1)
lfs f0, lo16(LCPI2_0)(r2)
lfd f1, -8(r1)
fsub f0, f1, f0
frsp f1, f0
blr
This speeds up Misc/pi from 2.44s->2.09s with LLC and from 3.01->2.18s
with llcbeta (16.7% and 38.1% respectively).
llvm-svn: 26943
2006-03-22 13:30:33 +08:00
|
|
|
// If the intermediate type is i32, we can avoid the load/store here
|
|
|
|
// too.
|
2006-03-01 12:57:39 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
2006-03-01 13:50:56 +08:00
|
|
|
case ISD::STORE:
|
|
|
|
// Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
|
|
|
|
if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
|
2008-01-19 00:54:56 +08:00
|
|
|
!cast<StoreSDNode>(N)->isTruncatingStore() &&
|
2006-03-01 13:50:56 +08:00
|
|
|
N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
|
2009-08-12 04:47:22 +08:00
|
|
|
N->getOperand(1).getValueType() == MVT::i32 &&
|
|
|
|
N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Val = N->getOperand(1).getOperand(0);
|
2009-08-12 04:47:22 +08:00
|
|
|
if (Val.getValueType() == MVT::f32) {
|
|
|
|
Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
|
2008-08-29 05:40:38 +08:00
|
|
|
DCI.AddToWorklist(Val.getNode());
|
2006-03-01 13:50:56 +08:00
|
|
|
}
|
2009-08-12 04:47:22 +08:00
|
|
|
Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
|
2008-08-29 05:40:38 +08:00
|
|
|
DCI.AddToWorklist(Val.getNode());
|
2006-03-01 13:50:56 +08:00
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val,
|
2006-03-01 13:50:56 +08:00
|
|
|
N->getOperand(2), N->getOperand(3));
|
2008-08-29 05:40:38 +08:00
|
|
|
DCI.AddToWorklist(Val.getNode());
|
2006-03-01 13:50:56 +08:00
|
|
|
return Val;
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-07-11 04:56:58 +08:00
|
|
|
// Turn STORE (BSWAP) -> sthbrx/stwbrx.
|
2009-09-25 08:57:30 +08:00
|
|
|
if (cast<StoreSDNode>(N)->isUnindexed() &&
|
|
|
|
N->getOperand(1).getOpcode() == ISD::BSWAP &&
|
2008-08-29 05:40:38 +08:00
|
|
|
N->getOperand(1).getNode()->hasOneUse() &&
|
2009-08-12 04:47:22 +08:00
|
|
|
(N->getOperand(1).getValueType() == MVT::i32 ||
|
|
|
|
N->getOperand(1).getValueType() == MVT::i16)) {
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue BSwapOp = N->getOperand(1).getOperand(0);
|
2006-07-11 04:56:58 +08:00
|
|
|
// Do an any-extend to 32-bits if this is a half-word input.
|
2009-08-12 04:47:22 +08:00
|
|
|
if (BSwapOp.getValueType() == MVT::i16)
|
|
|
|
BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
|
2006-07-11 04:56:58 +08:00
|
|
|
|
2009-09-26 04:36:54 +08:00
|
|
|
SDValue Ops[] = {
|
|
|
|
N->getOperand(0), BSwapOp, N->getOperand(2),
|
|
|
|
DAG.getValueType(N->getOperand(1).getValueType())
|
|
|
|
};
|
|
|
|
return
|
|
|
|
DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
|
|
|
|
Ops, array_lengthof(Ops),
|
|
|
|
cast<StoreSDNode>(N)->getMemoryVT(),
|
|
|
|
cast<StoreSDNode>(N)->getMemOperand());
|
2006-07-11 04:56:58 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case ISD::BSWAP:
|
|
|
|
// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
|
2008-08-29 05:40:38 +08:00
|
|
|
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
|
2006-07-11 04:56:58 +08:00
|
|
|
N->getOperand(0).hasOneUse() &&
|
2009-08-12 04:47:22 +08:00
|
|
|
(N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Load = N->getOperand(0);
|
2006-10-10 04:57:25 +08:00
|
|
|
LoadSDNode *LD = cast<LoadSDNode>(Load);
|
2006-07-11 04:56:58 +08:00
|
|
|
// Create the byte-swapping load.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Ops[] = {
|
2006-10-10 04:57:25 +08:00
|
|
|
LD->getChain(), // Chain
|
|
|
|
LD->getBasePtr(), // Ptr
|
2006-08-12 01:18:05 +08:00
|
|
|
DAG.getValueType(N->getValueType(0)) // VT
|
|
|
|
};
|
2009-09-26 04:36:54 +08:00
|
|
|
SDValue BSLoad =
|
|
|
|
DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
|
|
|
|
DAG.getVTList(MVT::i32, MVT::Other), Ops, 3,
|
|
|
|
LD->getMemoryVT(), LD->getMemOperand());
|
2006-07-11 04:56:58 +08:00
|
|
|
|
2009-02-18 06:15:04 +08:00
|
|
|
// If this is an i16 load, insert the truncate.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue ResVal = BSLoad;
|
2009-08-12 04:47:22 +08:00
|
|
|
if (N->getValueType(0) == MVT::i16)
|
|
|
|
ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-07-11 04:56:58 +08:00
|
|
|
// First, combine the bswap away. This makes the value produced by the
|
|
|
|
// load dead.
|
|
|
|
DCI.CombineTo(N, ResVal);
|
|
|
|
|
|
|
|
// Next, combine the load away, we give it a bogus result value but a real
|
|
|
|
// chain result. The result value is dead because the bswap is dead.
|
2008-08-29 05:40:38 +08:00
|
|
|
DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-07-11 04:56:58 +08:00
|
|
|
// Return N so it doesn't get rechecked!
|
2008-07-28 05:46:04 +08:00
|
|
|
return SDValue(N, 0);
|
2006-07-11 04:56:58 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-03-01 13:50:56 +08:00
|
|
|
break;
|
Implement an item from the readme, folding vcmp/vcmp. instructions with
identical instructions into a single instruction. For example, for:
void test(vector float *x, vector float *y, int *P) {
int v = vec_any_out(*x, *y);
*x = (vector float)vec_cmpb(*x, *y);
*P = v;
}
we now generate:
_test:
mfspr r2, 256
oris r6, r2, 49152
mtspr 256, r6
lvx v0, 0, r4
lvx v1, 0, r3
vcmpbfp. v0, v1, v0
mfcr r4, 2
stvx v0, 0, r3
rlwinm r3, r4, 27, 31, 31
xori r3, r3, 1
stw r3, 0(r5)
mtspr 256, r2
blr
instead of:
_test:
mfspr r2, 256
oris r6, r2, 57344
mtspr 256, r6
lvx v0, 0, r4
lvx v1, 0, r3
vcmpbfp. v2, v1, v0
mfcr r4, 2
*** vcmpbfp v0, v1, v0
rlwinm r4, r4, 27, 31, 31
stvx v0, 0, r3
xori r3, r4, 1
stw r3, 0(r5)
mtspr 256, r2
blr
Testcase here: CodeGen/PowerPC/vcmp-fold.ll
llvm-svn: 27290
2006-03-31 14:02:07 +08:00
|
|
|
case PPCISD::VCMP: {
|
|
|
|
// If a VCMPo node already exists with exactly the same operands as this
|
|
|
|
// node, use its result instead of this node (VCMPo computes both a CR6 and
|
|
|
|
// a normal output).
|
|
|
|
//
|
|
|
|
if (!N->getOperand(0).hasOneUse() &&
|
|
|
|
!N->getOperand(1).hasOneUse() &&
|
|
|
|
!N->getOperand(2).hasOneUse()) {
|
2009-02-18 06:15:04 +08:00
|
|
|
|
Implement an item from the readme, folding vcmp/vcmp. instructions with
identical instructions into a single instruction. For example, for:
void test(vector float *x, vector float *y, int *P) {
int v = vec_any_out(*x, *y);
*x = (vector float)vec_cmpb(*x, *y);
*P = v;
}
we now generate:
_test:
mfspr r2, 256
oris r6, r2, 49152
mtspr 256, r6
lvx v0, 0, r4
lvx v1, 0, r3
vcmpbfp. v0, v1, v0
mfcr r4, 2
stvx v0, 0, r3
rlwinm r3, r4, 27, 31, 31
xori r3, r3, 1
stw r3, 0(r5)
mtspr 256, r2
blr
instead of:
_test:
mfspr r2, 256
oris r6, r2, 57344
mtspr 256, r6
lvx v0, 0, r4
lvx v1, 0, r3
vcmpbfp. v2, v1, v0
mfcr r4, 2
*** vcmpbfp v0, v1, v0
rlwinm r4, r4, 27, 31, 31
stvx v0, 0, r3
xori r3, r4, 1
stw r3, 0(r5)
mtspr 256, r2
blr
Testcase here: CodeGen/PowerPC/vcmp-fold.ll
llvm-svn: 27290
2006-03-31 14:02:07 +08:00
|
|
|
// Scan all of the users of the LHS, looking for VCMPo's that match.
|
|
|
|
SDNode *VCMPoNode = 0;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-08-29 05:40:38 +08:00
|
|
|
SDNode *LHSN = N->getOperand(0).getNode();
|
Implement an item from the readme, folding vcmp/vcmp. instructions with
identical instructions into a single instruction. For example, for:
void test(vector float *x, vector float *y, int *P) {
int v = vec_any_out(*x, *y);
*x = (vector float)vec_cmpb(*x, *y);
*P = v;
}
we now generate:
_test:
mfspr r2, 256
oris r6, r2, 49152
mtspr 256, r6
lvx v0, 0, r4
lvx v1, 0, r3
vcmpbfp. v0, v1, v0
mfcr r4, 2
stvx v0, 0, r3
rlwinm r3, r4, 27, 31, 31
xori r3, r3, 1
stw r3, 0(r5)
mtspr 256, r2
blr
instead of:
_test:
mfspr r2, 256
oris r6, r2, 57344
mtspr 256, r6
lvx v0, 0, r4
lvx v1, 0, r3
vcmpbfp. v2, v1, v0
mfcr r4, 2
*** vcmpbfp v0, v1, v0
rlwinm r4, r4, 27, 31, 31
stvx v0, 0, r3
xori r3, r4, 1
stw r3, 0(r5)
mtspr 256, r2
blr
Testcase here: CodeGen/PowerPC/vcmp-fold.ll
llvm-svn: 27290
2006-03-31 14:02:07 +08:00
|
|
|
for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
|
|
|
|
UI != E; ++UI)
|
2008-07-28 04:43:25 +08:00
|
|
|
if (UI->getOpcode() == PPCISD::VCMPo &&
|
|
|
|
UI->getOperand(1) == N->getOperand(1) &&
|
|
|
|
UI->getOperand(2) == N->getOperand(2) &&
|
|
|
|
UI->getOperand(0) == N->getOperand(0)) {
|
|
|
|
VCMPoNode = *UI;
|
Implement an item from the readme, folding vcmp/vcmp. instructions with
identical instructions into a single instruction. For example, for:
void test(vector float *x, vector float *y, int *P) {
int v = vec_any_out(*x, *y);
*x = (vector float)vec_cmpb(*x, *y);
*P = v;
}
we now generate:
_test:
mfspr r2, 256
oris r6, r2, 49152
mtspr 256, r6
lvx v0, 0, r4
lvx v1, 0, r3
vcmpbfp. v0, v1, v0
mfcr r4, 2
stvx v0, 0, r3
rlwinm r3, r4, 27, 31, 31
xori r3, r3, 1
stw r3, 0(r5)
mtspr 256, r2
blr
instead of:
_test:
mfspr r2, 256
oris r6, r2, 57344
mtspr 256, r6
lvx v0, 0, r4
lvx v1, 0, r3
vcmpbfp. v2, v1, v0
mfcr r4, 2
*** vcmpbfp v0, v1, v0
rlwinm r4, r4, 27, 31, 31
stvx v0, 0, r3
xori r3, r4, 1
stw r3, 0(r5)
mtspr 256, r2
blr
Testcase here: CodeGen/PowerPC/vcmp-fold.ll
llvm-svn: 27290
2006-03-31 14:02:07 +08:00
|
|
|
break;
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-19 02:28:22 +08:00
|
|
|
// If there is no VCMPo node, or if the flag value has a single use, don't
|
|
|
|
// transform this.
|
|
|
|
if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
|
|
|
|
break;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
|
|
|
// Look at the (necessarily single) use of the flag value. If it has a
|
2006-04-19 02:28:22 +08:00
|
|
|
// chain, this transformation is more complex. Note that multiple things
|
|
|
|
// could use the value result, which we should ignore.
|
|
|
|
SDNode *FlagUser = 0;
|
2009-02-18 06:15:04 +08:00
|
|
|
for (SDNode::use_iterator UI = VCMPoNode->use_begin();
|
2006-04-19 02:28:22 +08:00
|
|
|
FlagUser == 0; ++UI) {
|
|
|
|
assert(UI != VCMPoNode->use_end() && "Didn't find user!");
|
2008-07-28 04:43:25 +08:00
|
|
|
SDNode *User = *UI;
|
2006-04-19 02:28:22 +08:00
|
|
|
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
|
2008-07-28 05:46:04 +08:00
|
|
|
if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
|
2006-04-19 02:28:22 +08:00
|
|
|
FlagUser = User;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-04-19 02:28:22 +08:00
|
|
|
// If the user is a MFCR instruction, we know this is safe. Otherwise we
|
|
|
|
// give up for right now.
|
|
|
|
if (FlagUser->getOpcode() == PPCISD::MFCR)
|
2008-07-28 05:46:04 +08:00
|
|
|
return SDValue(VCMPoNode, 0);
|
Implement an item from the readme, folding vcmp/vcmp. instructions with
identical instructions into a single instruction. For example, for:
void test(vector float *x, vector float *y, int *P) {
int v = vec_any_out(*x, *y);
*x = (vector float)vec_cmpb(*x, *y);
*P = v;
}
we now generate:
_test:
mfspr r2, 256
oris r6, r2, 49152
mtspr 256, r6
lvx v0, 0, r4
lvx v1, 0, r3
vcmpbfp. v0, v1, v0
mfcr r4, 2
stvx v0, 0, r3
rlwinm r3, r4, 27, 31, 31
xori r3, r3, 1
stw r3, 0(r5)
mtspr 256, r2
blr
instead of:
_test:
mfspr r2, 256
oris r6, r2, 57344
mtspr 256, r6
lvx v0, 0, r4
lvx v1, 0, r3
vcmpbfp. v2, v1, v0
mfcr r4, 2
*** vcmpbfp v0, v1, v0
rlwinm r4, r4, 27, 31, 31
stvx v0, 0, r3
xori r3, r4, 1
stw r3, 0(r5)
mtspr 256, r2
blr
Testcase here: CodeGen/PowerPC/vcmp-fold.ll
llvm-svn: 27290
2006-03-31 14:02:07 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
case ISD::BR_CC: {
|
|
|
|
// If this is a branch on an altivec predicate comparison, lower this so
|
|
|
|
// that we don't have to do a MFCR: instead, branch directly on CR6. This
|
|
|
|
// lowering is done pre-legalize, because the legalizer lowers the predicate
|
|
|
|
// compare down to code that is difficult to reassemble.
|
|
|
|
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
int CompareOpc;
|
|
|
|
bool isDot;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
|
|
|
|
isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
|
|
|
|
getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
|
|
|
|
assert(isDot && "Can't compare against a vector result!");
|
2009-02-18 06:15:04 +08:00
|
|
|
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
// If this is a comparison against something other than 0/1, then we know
|
|
|
|
// that the condition is never/always true.
|
2008-09-13 00:56:44 +08:00
|
|
|
unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
if (Val != 0 && Val != 1) {
|
|
|
|
if (CC == ISD::SETEQ) // Cond never true, remove branch.
|
|
|
|
return N->getOperand(0);
|
|
|
|
// Always !=, turn it into an unconditional branch.
|
2009-08-12 04:47:22 +08:00
|
|
|
return DAG.getNode(ISD::BR, dl, MVT::Other,
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
N->getOperand(0), N->getOperand(4));
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
// Create the PPCISD altivec 'dot' comparison node.
|
2009-08-11 06:56:29 +08:00
|
|
|
std::vector<EVT> VTs;
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Ops[] = {
|
2006-08-12 01:18:05 +08:00
|
|
|
LHS.getOperand(2), // LHS of compare
|
|
|
|
LHS.getOperand(3), // RHS of compare
|
2009-08-12 04:47:22 +08:00
|
|
|
DAG.getConstant(CompareOpc, MVT::i32)
|
2006-08-12 01:18:05 +08:00
|
|
|
};
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
VTs.push_back(LHS.getOperand(2).getValueType());
|
2009-08-12 04:47:22 +08:00
|
|
|
VTs.push_back(MVT::Flag);
|
2009-02-06 06:07:54 +08:00
|
|
|
SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
|
2009-02-18 06:15:04 +08:00
|
|
|
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
// Unpack the result based on how the target uses it.
|
2006-11-18 06:10:59 +08:00
|
|
|
PPC::Predicate CompOpc;
|
2008-09-13 00:56:44 +08:00
|
|
|
switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
default: // Can't happen, don't crash on invalid number though.
|
|
|
|
case 0: // Branch on the value of the EQ bit of CR6.
|
2006-11-18 06:10:59 +08:00
|
|
|
CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
break;
|
|
|
|
case 1: // Branch on the inverted value of the EQ bit of CR6.
|
2006-11-18 06:10:59 +08:00
|
|
|
CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
break;
|
|
|
|
case 2: // Branch on the value of the LT bit of CR6.
|
2006-11-18 06:10:59 +08:00
|
|
|
CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
break;
|
|
|
|
case 3: // Branch on the inverted value of the LT bit of CR6.
|
2006-11-18 06:10:59 +08:00
|
|
|
CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2009-08-12 04:47:22 +08:00
|
|
|
return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
|
|
|
|
DAG.getConstant(CompOpc, MVT::i32),
|
|
|
|
DAG.getRegister(PPC::CR6, MVT::i32),
|
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't
use a (serializing) MFCR instruction to read the CR6 register, which requires
a compare to get it back to CR's. Instead, just branch on CR6 directly. :)
For example, for:
void foo2(vector float *A, vector float *B) {
if (!vec_any_eq(*A, *B))
*B = (vector float){0,0,0,0};
}
We now generate:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
bne cr6, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
instead of:
_foo2:
mfspr r2, 256
oris r5, r2, 12288
mtspr 256, r5
lvx v2, 0, r4
lvx v3, 0, r3
vcmpeqfp. v2, v3, v2
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
cmpwi cr0, r3, 0
beq cr0, LBB1_2 ; UnifiedReturnBlock
LBB1_1: ; cond_true
vxor v2, v2, v2
stvx v2, 0, r4
mtspr 256, r2
blr
LBB1_2: ; UnifiedReturnBlock
mtspr 256, r2
blr
This implements CodeGen/PowerPC/vec_br_cmp.ll.
llvm-svn: 27804
2006-04-19 01:59:36 +08:00
|
|
|
N->getOperand(4), CompNode.getValue(1));
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2006-03-01 12:57:39 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
return SDValue();
|
2006-03-01 12:57:39 +08:00
|
|
|
}
|
|
|
|
|
2006-04-14 14:01:58 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Inline Assembly Support
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
|
2008-02-14 06:28:48 +08:00
|
|
|
const APInt &Mask,
|
2009-02-18 06:15:04 +08:00
|
|
|
APInt &KnownZero,
|
2008-02-13 08:35:47 +08:00
|
|
|
APInt &KnownOne,
|
2007-06-22 22:59:07 +08:00
|
|
|
const SelectionDAG &DAG,
|
2006-04-02 14:26:07 +08:00
|
|
|
unsigned Depth) const {
|
2008-02-13 08:35:47 +08:00
|
|
|
KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
|
2006-04-02 14:26:07 +08:00
|
|
|
switch (Op.getOpcode()) {
|
|
|
|
default: break;
|
2006-07-11 04:56:58 +08:00
|
|
|
case PPCISD::LBRX: {
|
|
|
|
// lhbrx is known to have the top bits cleared out.
|
2009-09-28 07:17:47 +08:00
|
|
|
if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
|
2006-07-11 04:56:58 +08:00
|
|
|
KnownZero = 0xFFFF0000;
|
|
|
|
break;
|
|
|
|
}
|
2006-04-02 14:26:07 +08:00
|
|
|
case ISD::INTRINSIC_WO_CHAIN: {
|
2008-09-13 00:56:44 +08:00
|
|
|
switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
|
2006-04-02 14:26:07 +08:00
|
|
|
default: break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpbfp_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpeqfp_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpequb_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpequh_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpequw_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgefp_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtfp_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtsb_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtsh_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtsw_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtub_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtuh_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtuw_p:
|
|
|
|
KnownZero = ~1U; // All bits but the low one are known to be zero.
|
|
|
|
break;
|
2009-02-18 06:15:04 +08:00
|
|
|
}
|
2006-04-02 14:26:07 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-03-25 10:14:49 +08:00
|
|
|
/// getConstraintType - Given a constraint, return the type of
|
2006-02-08 04:16:30 +08:00
|
|
|
/// constraint it is for this target.
|
2009-02-18 06:15:04 +08:00
|
|
|
PPCTargetLowering::ConstraintType
|
2007-03-25 10:14:49 +08:00
|
|
|
PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
|
|
|
|
if (Constraint.size() == 1) {
|
|
|
|
switch (Constraint[0]) {
|
|
|
|
default: break;
|
|
|
|
case 'b':
|
|
|
|
case 'r':
|
|
|
|
case 'f':
|
|
|
|
case 'v':
|
|
|
|
case 'y':
|
|
|
|
return C_RegisterClass;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return TargetLowering::getConstraintType(Constraint);
|
2006-02-08 04:16:30 +08:00
|
|
|
}
|
|
|
|
|
2009-02-18 06:15:04 +08:00
|
|
|
std::pair<unsigned, const TargetRegisterClass*>
|
2006-11-02 09:44:04 +08:00
|
|
|
PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT VT) const {
|
2006-02-01 03:20:21 +08:00
|
|
|
if (Constraint.size() == 1) {
|
2006-11-02 09:44:04 +08:00
|
|
|
// GCC RS6000 Constraint Letters
|
|
|
|
switch (Constraint[0]) {
|
|
|
|
case 'b': // R1-R31
|
|
|
|
case 'r': // R0-R31
|
2009-08-12 04:47:22 +08:00
|
|
|
if (VT == MVT::i64 && PPCSubTarget.isPPC64())
|
2006-11-02 09:44:04 +08:00
|
|
|
return std::make_pair(0U, PPC::G8RCRegisterClass);
|
|
|
|
return std::make_pair(0U, PPC::GPRCRegisterClass);
|
|
|
|
case 'f':
|
2009-08-12 04:47:22 +08:00
|
|
|
if (VT == MVT::f32)
|
2006-11-02 09:44:04 +08:00
|
|
|
return std::make_pair(0U, PPC::F4RCRegisterClass);
|
2009-08-12 04:47:22 +08:00
|
|
|
else if (VT == MVT::f64)
|
2006-11-02 09:44:04 +08:00
|
|
|
return std::make_pair(0U, PPC::F8RCRegisterClass);
|
|
|
|
break;
|
2009-02-18 06:15:04 +08:00
|
|
|
case 'v':
|
2006-11-02 09:44:04 +08:00
|
|
|
return std::make_pair(0U, PPC::VRRCRegisterClass);
|
|
|
|
case 'y': // crrc
|
|
|
|
return std::make_pair(0U, PPC::CRRCRegisterClass);
|
2006-02-01 03:20:21 +08:00
|
|
|
}
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-11-02 09:44:04 +08:00
|
|
|
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
|
2006-02-01 03:20:21 +08:00
|
|
|
}
|
2006-02-07 08:47:13 +08:00
|
|
|
|
2006-11-02 09:44:04 +08:00
|
|
|
|
2007-08-25 08:47:38 +08:00
|
|
|
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
|
2008-09-24 08:05:32 +08:00
|
|
|
/// vector. If it is invalid, don't add anything to Ops. If hasMemory is true
|
|
|
|
/// it means one of the asm constraint of the inline asm instruction being
|
|
|
|
/// processed is 'm'.
|
2008-07-28 05:46:04 +08:00
|
|
|
void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter,
|
2008-09-24 08:05:32 +08:00
|
|
|
bool hasMemory,
|
2008-07-28 05:46:04 +08:00
|
|
|
std::vector<SDValue>&Ops,
|
2008-04-27 07:02:14 +08:00
|
|
|
SelectionDAG &DAG) const {
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Result(0,0);
|
2006-02-07 08:47:13 +08:00
|
|
|
switch (Letter) {
|
|
|
|
default: break;
|
|
|
|
case 'I':
|
|
|
|
case 'J':
|
|
|
|
case 'K':
|
|
|
|
case 'L':
|
|
|
|
case 'M':
|
|
|
|
case 'N':
|
|
|
|
case 'O':
|
|
|
|
case 'P': {
|
2007-05-15 09:31:05 +08:00
|
|
|
ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
|
2007-08-25 08:47:38 +08:00
|
|
|
if (!CST) return; // Must be an immediate to match.
|
2008-09-13 00:56:44 +08:00
|
|
|
unsigned Value = CST->getZExtValue();
|
2006-02-07 08:47:13 +08:00
|
|
|
switch (Letter) {
|
2009-07-15 00:55:14 +08:00
|
|
|
default: llvm_unreachable("Unknown constraint letter!");
|
2006-02-07 08:47:13 +08:00
|
|
|
case 'I': // "I" is a signed 16-bit constant.
|
2007-05-15 09:31:05 +08:00
|
|
|
if ((short)Value == (int)Value)
|
2007-08-25 08:47:38 +08:00
|
|
|
Result = DAG.getTargetConstant(Value, Op.getValueType());
|
2006-11-01 03:40:43 +08:00
|
|
|
break;
|
2006-02-07 08:47:13 +08:00
|
|
|
case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
|
|
|
|
case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
|
2007-05-15 09:31:05 +08:00
|
|
|
if ((short)Value == 0)
|
2007-08-25 08:47:38 +08:00
|
|
|
Result = DAG.getTargetConstant(Value, Op.getValueType());
|
2006-11-01 03:40:43 +08:00
|
|
|
break;
|
2006-02-07 08:47:13 +08:00
|
|
|
case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
|
2007-05-15 09:31:05 +08:00
|
|
|
if ((Value >> 16) == 0)
|
2007-08-25 08:47:38 +08:00
|
|
|
Result = DAG.getTargetConstant(Value, Op.getValueType());
|
2006-11-01 03:40:43 +08:00
|
|
|
break;
|
2006-02-07 08:47:13 +08:00
|
|
|
case 'M': // "M" is a constant that is greater than 31.
|
2007-05-15 09:31:05 +08:00
|
|
|
if (Value > 31)
|
2007-08-25 08:47:38 +08:00
|
|
|
Result = DAG.getTargetConstant(Value, Op.getValueType());
|
2006-11-01 03:40:43 +08:00
|
|
|
break;
|
2006-02-07 08:47:13 +08:00
|
|
|
case 'N': // "N" is a positive constant that is an exact power of two.
|
2007-05-15 09:31:05 +08:00
|
|
|
if ((int)Value > 0 && isPowerOf2_32(Value))
|
2007-08-25 08:47:38 +08:00
|
|
|
Result = DAG.getTargetConstant(Value, Op.getValueType());
|
2006-11-01 03:40:43 +08:00
|
|
|
break;
|
2009-02-18 06:15:04 +08:00
|
|
|
case 'O': // "O" is the constant zero.
|
2007-05-15 09:31:05 +08:00
|
|
|
if (Value == 0)
|
2007-08-25 08:47:38 +08:00
|
|
|
Result = DAG.getTargetConstant(Value, Op.getValueType());
|
2006-11-01 03:40:43 +08:00
|
|
|
break;
|
2006-02-07 08:47:13 +08:00
|
|
|
case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
|
2007-05-15 09:31:05 +08:00
|
|
|
if ((short)-Value == (int)-Value)
|
2007-08-25 08:47:38 +08:00
|
|
|
Result = DAG.getTargetConstant(Value, Op.getValueType());
|
2006-11-01 03:40:43 +08:00
|
|
|
break;
|
2006-02-07 08:47:13 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2008-08-29 05:40:38 +08:00
|
|
|
if (Result.getNode()) {
|
2007-08-25 08:47:38 +08:00
|
|
|
Ops.push_back(Result);
|
|
|
|
return;
|
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2006-02-07 08:47:13 +08:00
|
|
|
// Handle standard constraint letters.
|
2008-09-24 08:05:32 +08:00
|
|
|
TargetLowering::LowerAsmOperandForConstraint(Op, Letter, hasMemory, Ops, DAG);
|
2006-02-07 08:47:13 +08:00
|
|
|
}
|
2006-03-14 07:20:37 +08:00
|
|
|
|
2007-03-31 07:15:24 +08:00
|
|
|
// isLegalAddressingMode - Return true if the addressing mode represented
|
|
|
|
// by AM is legal for this target, for a load/store of the specified type.
|
2009-02-18 06:15:04 +08:00
|
|
|
bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
|
2007-03-31 07:15:24 +08:00
|
|
|
const Type *Ty) const {
|
|
|
|
// FIXME: PPC does not allow r+i addressing modes for vectors!
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2007-03-31 07:15:24 +08:00
|
|
|
// PPC allows a sign-extended 16-bit immediate field.
|
|
|
|
if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
|
|
|
|
return false;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2007-03-31 07:15:24 +08:00
|
|
|
// No global is ever allowed as a base.
|
|
|
|
if (AM.BaseGV)
|
|
|
|
return false;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
|
|
|
// PPC only support r+r,
|
2007-03-31 07:15:24 +08:00
|
|
|
switch (AM.Scale) {
|
|
|
|
case 0: // "r+i" or just "i", depending on HasBaseReg.
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
|
|
|
|
return false;
|
|
|
|
// Otherwise we have r+r or r+i.
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
|
|
|
|
return false;
|
|
|
|
// Allow 2*r as r+r.
|
|
|
|
break;
|
2007-04-10 06:10:05 +08:00
|
|
|
default:
|
|
|
|
// No other scales are supported.
|
|
|
|
return false;
|
2007-03-31 07:15:24 +08:00
|
|
|
}
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2007-03-31 07:15:24 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2006-03-14 07:20:37 +08:00
|
|
|
/// isLegalAddressImmediate - Return true if the integer value can be used
|
2007-03-13 07:29:01 +08:00
|
|
|
/// as the offset of the target addressing mode for load / store of the
|
|
|
|
/// given type.
|
|
|
|
bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{
|
2006-03-14 07:20:37 +08:00
|
|
|
// PPC allows a sign-extended 16-bit immediate field.
|
|
|
|
return (V > -(1 << 16) && V < (1 << 16)-1);
|
|
|
|
}
|
2006-08-28 09:02:49 +08:00
|
|
|
|
|
|
|
bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
|
2009-02-18 06:15:04 +08:00
|
|
|
return false;
|
2006-08-28 09:02:49 +08:00
|
|
|
}
|
2007-03-01 21:11:38 +08:00
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
|
2009-02-08 03:59:05 +08:00
|
|
|
DebugLoc dl = Op.getDebugLoc();
|
2009-02-18 06:15:04 +08:00
|
|
|
// Depths > 0 not supported yet!
|
2008-09-13 00:56:44 +08:00
|
|
|
if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
|
2008-07-28 05:46:04 +08:00
|
|
|
return SDValue();
|
2007-12-08 14:59:59 +08:00
|
|
|
|
|
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
|
|
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
|
2008-04-30 17:16:33 +08:00
|
|
|
|
2007-12-08 14:59:59 +08:00
|
|
|
// Just load the return address off the stack.
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
|
2008-04-30 17:16:33 +08:00
|
|
|
|
|
|
|
// Make sure the function really does not optimize away the store of the RA
|
|
|
|
// to the stack.
|
|
|
|
FuncInfo->setLRStoreRequired();
|
2009-02-18 06:15:04 +08:00
|
|
|
return DAG.getLoad(getPointerTy(), dl,
|
2009-02-05 04:06:27 +08:00
|
|
|
DAG.getEntryNode(), RetAddrFI, NULL, 0);
|
2007-12-08 14:59:59 +08:00
|
|
|
}
|
|
|
|
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
|
2009-02-05 07:02:30 +08:00
|
|
|
DebugLoc dl = Op.getDebugLoc();
|
2009-02-18 06:15:04 +08:00
|
|
|
// Depths > 0 not supported yet!
|
2008-09-13 00:56:44 +08:00
|
|
|
if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
|
2008-07-28 05:46:04 +08:00
|
|
|
return SDValue();
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
2009-08-12 04:47:22 +08:00
|
|
|
bool isPPC64 = PtrVT == MVT::i64;
|
2009-02-18 06:15:04 +08:00
|
|
|
|
2007-03-01 21:11:38 +08:00
|
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
|
|
MachineFrameInfo *MFI = MF.getFrameInfo();
|
2009-02-18 06:15:04 +08:00
|
|
|
bool is31 = (NoFramePointerElim || MFI->hasVarSizedObjects())
|
2007-03-01 21:11:38 +08:00
|
|
|
&& MFI->getStackSize();
|
|
|
|
|
|
|
|
if (isPPC64)
|
2009-02-05 07:02:30 +08:00
|
|
|
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, is31 ? PPC::X31 : PPC::X1,
|
2009-08-12 04:47:22 +08:00
|
|
|
MVT::i64);
|
2007-03-01 21:11:38 +08:00
|
|
|
else
|
2009-02-05 07:02:30 +08:00
|
|
|
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, is31 ? PPC::R31 : PPC::R1,
|
2009-08-12 04:47:22 +08:00
|
|
|
MVT::i32);
|
2007-03-01 21:11:38 +08:00
|
|
|
}
|
2008-10-21 11:41:46 +08:00
|
|
|
|
|
|
|
bool
|
|
|
|
PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
|
|
|
|
// The PowerPC target isn't yet aware of offsets.
|
|
|
|
return false;
|
|
|
|
}
|
2009-07-03 14:45:56 +08:00
|
|
|
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
|
2009-07-03 14:45:56 +08:00
|
|
|
bool isSrcConst, bool isSrcStr,
|
|
|
|
SelectionDAG &DAG) const {
|
|
|
|
if (this->PPCSubTarget.isPPC64()) {
|
2009-08-12 04:47:22 +08:00
|
|
|
return MVT::i64;
|
2009-07-03 14:45:56 +08:00
|
|
|
} else {
|
2009-08-12 04:47:22 +08:00
|
|
|
return MVT::i32;
|
2009-07-03 14:45:56 +08:00
|
|
|
}
|
|
|
|
}
|