2005-10-18 08:28:58 +08:00
|
|
|
//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
|
2005-08-17 01:14:42 +08:00
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file was developed by Chris Lattner and is distributed under
|
|
|
|
// the University of Illinois Open Source License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2005-10-16 13:39:50 +08:00
|
|
|
// This file implements the PPCISelLowering class.
|
2005-08-17 01:14:42 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2005-10-15 07:59:06 +08:00
|
|
|
#include "PPCISelLowering.h"
|
|
|
|
#include "PPCTargetMachine.h"
|
2006-02-01 15:19:44 +08:00
|
|
|
#include "llvm/ADT/VectorExtras.h"
|
2006-03-14 07:20:37 +08:00
|
|
|
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
2005-08-17 01:14:42 +08:00
|
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
2005-08-27 05:23:58 +08:00
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
2005-08-17 01:14:42 +08:00
|
|
|
#include "llvm/CodeGen/SelectionDAG.h"
|
2005-09-14 03:33:40 +08:00
|
|
|
#include "llvm/CodeGen/SSARegMap.h"
|
2005-08-27 01:36:52 +08:00
|
|
|
#include "llvm/Constants.h"
|
2005-08-17 01:14:42 +08:00
|
|
|
#include "llvm/Function.h"
|
2006-03-26 18:06:40 +08:00
|
|
|
#include "llvm/Intrinsics.h"
|
2006-02-01 15:19:44 +08:00
|
|
|
#include "llvm/Support/MathExtras.h"
|
2006-02-18 08:08:58 +08:00
|
|
|
#include "llvm/Target/TargetOptions.h"
|
2005-08-17 01:14:42 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
2005-10-16 13:39:50 +08:00
|
|
|
PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
|
2005-08-17 01:14:42 +08:00
|
|
|
: TargetLowering(TM) {
|
|
|
|
|
|
|
|
// Fold away setcc operations if possible.
|
|
|
|
setSetCCIsExpensive();
|
2005-10-21 08:02:42 +08:00
|
|
|
setPow2DivIsCheap();
|
2005-08-17 01:14:42 +08:00
|
|
|
|
2005-09-28 06:18:25 +08:00
|
|
|
// Use _setjmp/_longjmp instead of setjmp/longjmp.
|
|
|
|
setUseUnderscoreSetJmpLongJmp(true);
|
|
|
|
|
2005-08-17 01:14:42 +08:00
|
|
|
// Set up the register classes.
|
2005-10-18 08:28:58 +08:00
|
|
|
addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
|
|
|
|
addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
|
|
|
|
addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
|
2005-08-17 01:14:42 +08:00
|
|
|
|
2006-01-29 14:26:08 +08:00
|
|
|
setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
|
|
|
|
setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
|
|
|
|
|
2005-08-17 01:14:42 +08:00
|
|
|
// PowerPC has no intrinsics for these particular operations
|
|
|
|
setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
|
|
|
|
setOperationAction(ISD::MEMSET, MVT::Other, Expand);
|
|
|
|
setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
|
|
|
|
|
|
|
|
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD
|
|
|
|
setOperationAction(ISD::SEXTLOAD, MVT::i1, Expand);
|
|
|
|
setOperationAction(ISD::SEXTLOAD, MVT::i8, Expand);
|
|
|
|
|
|
|
|
// PowerPC has no SREM/UREM instructions
|
|
|
|
setOperationAction(ISD::SREM, MVT::i32, Expand);
|
|
|
|
setOperationAction(ISD::UREM, MVT::i32, Expand);
|
|
|
|
|
|
|
|
// We don't support sin/cos/sqrt/fmod
|
|
|
|
setOperationAction(ISD::FSIN , MVT::f64, Expand);
|
|
|
|
setOperationAction(ISD::FCOS , MVT::f64, Expand);
|
2005-09-29 06:29:58 +08:00
|
|
|
setOperationAction(ISD::FREM , MVT::f64, Expand);
|
2005-08-17 01:14:42 +08:00
|
|
|
setOperationAction(ISD::FSIN , MVT::f32, Expand);
|
|
|
|
setOperationAction(ISD::FCOS , MVT::f32, Expand);
|
2005-09-29 06:29:58 +08:00
|
|
|
setOperationAction(ISD::FREM , MVT::f32, Expand);
|
2005-08-17 01:14:42 +08:00
|
|
|
|
|
|
|
// If we're enabling GP optimizations, use hardware square root
|
2005-09-03 02:33:05 +08:00
|
|
|
if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
|
2005-08-17 01:14:42 +08:00
|
|
|
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
|
|
|
|
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
|
|
|
|
}
|
|
|
|
|
2006-03-05 13:08:37 +08:00
|
|
|
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
|
|
|
|
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
|
|
|
|
|
2006-01-14 11:14:10 +08:00
|
|
|
// PowerPC does not have BSWAP, CTPOP or CTTZ
|
|
|
|
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
|
2005-08-17 01:14:42 +08:00
|
|
|
setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
|
|
|
|
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
|
|
|
|
|
2006-01-12 05:21:00 +08:00
|
|
|
// PowerPC does not have ROTR
|
|
|
|
setOperationAction(ISD::ROTR, MVT::i32 , Expand);
|
|
|
|
|
2005-08-17 01:14:42 +08:00
|
|
|
// PowerPC does not have Select
|
|
|
|
setOperationAction(ISD::SELECT, MVT::i32, Expand);
|
|
|
|
setOperationAction(ISD::SELECT, MVT::f32, Expand);
|
|
|
|
setOperationAction(ISD::SELECT, MVT::f64, Expand);
|
2005-08-26 08:52:45 +08:00
|
|
|
|
2005-08-27 01:36:52 +08:00
|
|
|
// PowerPC wants to turn select_cc of FP into fsel when possible.
|
|
|
|
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
|
|
|
|
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
|
Codegen
bool %test(int %X) {
%Y = seteq int %X, 13
ret bool %Y
}
as
_test:
addi r2, r3, -13
cntlzw r2, r2
srwi r3, r2, 5
blr
rather than
_test:
cmpwi cr7, r3, 13
mfcr r2
rlwinm r3, r2, 31, 31, 31
blr
This has very little effect on most code, but speeds up analyzer 23% and
mason 11%
llvm-svn: 25848
2006-01-31 16:17:29 +08:00
|
|
|
|
2006-02-01 15:19:44 +08:00
|
|
|
// PowerPC wants to optimize integer setcc a bit
|
Codegen
bool %test(int %X) {
%Y = seteq int %X, 13
ret bool %Y
}
as
_test:
addi r2, r3, -13
cntlzw r2, r2
srwi r3, r2, 5
blr
rather than
_test:
cmpwi cr7, r3, 13
mfcr r2
rlwinm r3, r2, 31, 31, 31
blr
This has very little effect on most code, but speeds up analyzer 23% and
mason 11%
llvm-svn: 25848
2006-01-31 16:17:29 +08:00
|
|
|
setOperationAction(ISD::SETCC, MVT::i32, Custom);
|
2005-09-01 03:09:57 +08:00
|
|
|
|
2006-03-17 09:40:33 +08:00
|
|
|
// PowerPC does not have BRCOND which requires SetCC
|
|
|
|
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
|
2005-08-17 01:14:42 +08:00
|
|
|
|
2005-09-01 05:09:52 +08:00
|
|
|
// PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
|
|
|
|
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
|
2005-09-07 06:03:27 +08:00
|
|
|
|
2005-08-17 08:40:22 +08:00
|
|
|
// PowerPC does not have [U|S]INT_TO_FP
|
|
|
|
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
|
|
|
|
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
|
|
|
|
|
2005-12-23 13:13:35 +08:00
|
|
|
setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
|
|
|
|
setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
|
|
|
|
|
2005-09-10 08:21:06 +08:00
|
|
|
// PowerPC does not have truncstore for i1.
|
|
|
|
setOperationAction(ISD::TRUNCSTORE, MVT::i1, Promote);
|
2005-11-29 14:16:21 +08:00
|
|
|
|
2006-01-05 09:25:28 +08:00
|
|
|
// Support label based line numbers.
|
2005-11-29 14:16:21 +08:00
|
|
|
setOperationAction(ISD::LOCATION, MVT::Other, Expand);
|
2006-01-05 09:47:43 +08:00
|
|
|
setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
|
2006-01-05 09:25:28 +08:00
|
|
|
// FIXME - use subtarget debug flags
|
2006-01-05 09:47:43 +08:00
|
|
|
if (!TM.getSubtarget<PPCSubtarget>().isDarwin())
|
2006-01-05 09:25:28 +08:00
|
|
|
setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);
|
2005-09-10 08:21:06 +08:00
|
|
|
|
2005-12-10 10:36:00 +08:00
|
|
|
// We want to legalize GlobalAddress and ConstantPool nodes into the
|
|
|
|
// appropriate instructions to materialize the address.
|
2005-11-18 02:26:56 +08:00
|
|
|
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
|
2005-12-10 10:36:00 +08:00
|
|
|
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
|
2006-01-13 10:42:53 +08:00
|
|
|
|
2006-01-28 05:09:22 +08:00
|
|
|
// RET must be custom lowered, to meet ABI requirements
|
|
|
|
setOperationAction(ISD::RET , MVT::Other, Custom);
|
|
|
|
|
2006-01-26 02:21:52 +08:00
|
|
|
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
|
|
|
|
setOperationAction(ISD::VASTART , MVT::Other, Custom);
|
|
|
|
|
2006-01-15 17:02:48 +08:00
|
|
|
// Use the default implementation.
|
2006-01-26 02:21:52 +08:00
|
|
|
setOperationAction(ISD::VAARG , MVT::Other, Expand);
|
|
|
|
setOperationAction(ISD::VACOPY , MVT::Other, Expand);
|
|
|
|
setOperationAction(ISD::VAEND , MVT::Other, Expand);
|
2006-01-15 17:02:48 +08:00
|
|
|
setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
|
|
|
|
setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
|
|
|
|
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
|
2005-11-17 15:30:41 +08:00
|
|
|
|
2006-03-26 18:06:40 +08:00
|
|
|
// We want to custom lower some of our intrinsics.
|
2006-03-28 08:40:33 +08:00
|
|
|
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
2006-03-26 18:06:40 +08:00
|
|
|
|
2005-09-07 06:03:27 +08:00
|
|
|
if (TM.getSubtarget<PPCSubtarget>().is64Bit()) {
|
2005-10-18 08:28:58 +08:00
|
|
|
// They also have instructions for converting between i64 and fp.
|
2005-09-07 06:03:27 +08:00
|
|
|
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
|
|
|
|
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
|
2006-03-24 15:53:47 +08:00
|
|
|
|
|
|
|
// FIXME: disable this lowered code. This generates 64-bit register values,
|
|
|
|
// and we don't model the fact that the top part is clobbered by calls. We
|
|
|
|
// need to flag these together so that the value isn't live across a call.
|
|
|
|
//setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
|
|
|
|
|
2005-10-26 07:48:36 +08:00
|
|
|
// To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
|
|
|
|
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
|
|
|
|
} else {
|
2005-11-17 15:30:41 +08:00
|
|
|
// PowerPC does not have FP_TO_UINT on 32-bit implementations.
|
2005-10-26 07:48:36 +08:00
|
|
|
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
|
2005-10-18 08:56:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (TM.getSubtarget<PPCSubtarget>().has64BitRegs()) {
|
|
|
|
// 64 bit PowerPC implementations can support i64 types directly
|
|
|
|
addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
|
2005-10-18 08:28:58 +08:00
|
|
|
// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
|
|
|
|
setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
|
|
|
|
} else {
|
|
|
|
// 32 bit PowerPC wants to expand i64 shifts itself.
|
|
|
|
setOperationAction(ISD::SHL, MVT::i64, Custom);
|
|
|
|
setOperationAction(ISD::SRL, MVT::i64, Custom);
|
|
|
|
setOperationAction(ISD::SRA, MVT::i64, Custom);
|
2005-09-07 06:03:27 +08:00
|
|
|
}
|
2006-03-01 09:11:20 +08:00
|
|
|
|
2005-11-29 16:17:20 +08:00
|
|
|
if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
|
2006-04-01 03:52:36 +08:00
|
|
|
// First set operation action for all vector types to expand. Then we
|
|
|
|
// will selectively turn on ones that can be effectively codegen'd.
|
|
|
|
for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
|
|
|
|
VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
|
|
|
|
// add/sub/and/or/xor are legal for all supported vector VT's.
|
|
|
|
setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
|
|
|
|
setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
|
|
|
|
setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
|
|
|
|
setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
|
|
|
|
setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
|
|
|
|
|
2006-04-05 01:25:31 +08:00
|
|
|
// We promote all shuffles to v16i8.
|
|
|
|
setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Promote);
|
|
|
|
AddPromotedToType(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, MVT::v16i8);
|
2006-04-01 03:52:36 +08:00
|
|
|
|
|
|
|
setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
|
|
|
|
setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
|
|
|
|
setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
|
|
|
|
setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
|
|
|
|
setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
|
|
|
|
setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
|
|
|
|
setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
|
|
|
|
setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand);
|
2006-04-04 07:55:43 +08:00
|
|
|
|
|
|
|
setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Expand);
|
2006-04-01 03:52:36 +08:00
|
|
|
}
|
|
|
|
|
2006-04-05 01:25:31 +08:00
|
|
|
// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
|
|
|
|
// with merges, splats, etc.
|
|
|
|
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
|
|
|
|
|
2005-11-29 16:17:20 +08:00
|
|
|
addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
|
2005-12-20 07:25:09 +08:00
|
|
|
addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
|
2006-03-25 15:39:07 +08:00
|
|
|
addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
|
|
|
|
addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
|
2006-01-29 14:32:58 +08:00
|
|
|
|
2006-04-01 03:52:36 +08:00
|
|
|
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
|
2006-03-20 09:53:53 +08:00
|
|
|
|
2006-03-19 14:55:52 +08:00
|
|
|
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
|
|
|
|
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
|
2006-03-24 15:48:08 +08:00
|
|
|
|
2006-04-02 08:43:36 +08:00
|
|
|
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
|
|
|
|
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
|
2006-03-24 15:48:08 +08:00
|
|
|
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
|
|
|
|
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
|
2005-11-29 16:17:20 +08:00
|
|
|
}
|
|
|
|
|
2005-08-17 01:14:42 +08:00
|
|
|
setSetCCResultContents(ZeroOrOneSetCCResult);
|
2006-01-14 01:52:03 +08:00
|
|
|
setStackPointerRegisterToSaveRestore(PPC::R1);
|
2005-08-17 01:14:42 +08:00
|
|
|
|
2006-03-01 12:57:39 +08:00
|
|
|
// We have target-specific dag combine patterns for the following nodes:
|
|
|
|
setTargetDAGCombine(ISD::SINT_TO_FP);
|
2006-03-01 13:50:56 +08:00
|
|
|
setTargetDAGCombine(ISD::STORE);
|
2006-03-01 12:57:39 +08:00
|
|
|
|
2005-08-17 01:14:42 +08:00
|
|
|
computeRegisterProperties();
|
|
|
|
}
|
|
|
|
|
2006-01-10 07:52:17 +08:00
|
|
|
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|
|
|
switch (Opcode) {
|
|
|
|
default: return 0;
|
|
|
|
case PPCISD::FSEL: return "PPCISD::FSEL";
|
|
|
|
case PPCISD::FCFID: return "PPCISD::FCFID";
|
|
|
|
case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
|
|
|
|
case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
|
2006-03-01 13:50:56 +08:00
|
|
|
case PPCISD::STFIWX: return "PPCISD::STFIWX";
|
2006-01-10 07:52:17 +08:00
|
|
|
case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
|
|
|
|
case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
|
2006-03-20 09:53:53 +08:00
|
|
|
case PPCISD::VPERM: return "PPCISD::VPERM";
|
2006-01-10 07:52:17 +08:00
|
|
|
case PPCISD::Hi: return "PPCISD::Hi";
|
|
|
|
case PPCISD::Lo: return "PPCISD::Lo";
|
|
|
|
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
|
|
|
|
case PPCISD::SRL: return "PPCISD::SRL";
|
|
|
|
case PPCISD::SRA: return "PPCISD::SRA";
|
|
|
|
case PPCISD::SHL: return "PPCISD::SHL";
|
When possible, custom lower 32-bit SINT_TO_FP to this:
_foo2:
extsw r2, r3
std r2, -8(r1)
lfd f0, -8(r1)
fcfid f0, f0
frsp f1, f0
blr
instead of this:
_foo2:
lis r2, ha16(LCPI2_0)
lis r4, 17200
xoris r3, r3, 32768
stw r3, -4(r1)
stw r4, -8(r1)
lfs f0, lo16(LCPI2_0)(r2)
lfd f1, -8(r1)
fsub f0, f1, f0
frsp f1, f0
blr
This speeds up Misc/pi from 2.44s->2.09s with LLC and from 3.01->2.18s
with llcbeta (16.7% and 38.1% respectively).
llvm-svn: 26943
2006-03-22 13:30:33 +08:00
|
|
|
case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
|
|
|
|
case PPCISD::STD_32: return "PPCISD::STD_32";
|
2006-01-28 15:33:03 +08:00
|
|
|
case PPCISD::CALL: return "PPCISD::CALL";
|
2006-01-10 07:52:17 +08:00
|
|
|
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
|
2006-03-26 18:06:40 +08:00
|
|
|
case PPCISD::MFCR: return "PPCISD::MFCR";
|
2006-03-31 13:13:27 +08:00
|
|
|
case PPCISD::VCMP: return "PPCISD::VCMP";
|
2006-03-26 18:06:40 +08:00
|
|
|
case PPCISD::VCMPo: return "PPCISD::VCMPo";
|
2006-01-10 07:52:17 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-08-27 01:36:52 +08:00
|
|
|
/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
|
|
|
|
static bool isFloatingPointZero(SDOperand Op) {
|
|
|
|
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
|
|
|
|
return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);
|
|
|
|
else if (Op.getOpcode() == ISD::EXTLOAD || Op.getOpcode() == ISD::LOAD) {
|
|
|
|
// Maybe this has already been legalized into the constant pool?
|
|
|
|
if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
|
|
|
|
if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->get()))
|
|
|
|
return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2006-04-07 01:23:16 +08:00
|
|
|
/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
|
|
|
|
/// true if Op is undef or if it matches the specified value.
|
|
|
|
static bool isConstantOrUndef(SDOperand Op, unsigned Val) {
|
|
|
|
return Op.getOpcode() == ISD::UNDEF ||
|
|
|
|
cast<ConstantSDNode>(Op)->getValue() == Val;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
|
|
|
|
/// VPKUHUM instruction.
|
|
|
|
bool PPC::isVPKUHUMShuffleMask(SDNode *N) {
|
2006-04-07 02:26:28 +08:00
|
|
|
for (unsigned i = 0; i != 16; ++i)
|
|
|
|
if (!isConstantOrUndef(N->getOperand(i), i*2+1))
|
|
|
|
return false;
|
|
|
|
return true;
|
2006-04-07 01:23:16 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
|
|
|
|
/// VPKUWUM instruction.
|
|
|
|
bool PPC::isVPKUWUMShuffleMask(SDNode *N) {
|
2006-04-07 02:26:28 +08:00
|
|
|
for (unsigned i = 0; i != 16; i += 2)
|
|
|
|
if (!isConstantOrUndef(N->getOperand(i ), i*2+2) ||
|
|
|
|
!isConstantOrUndef(N->getOperand(i+1), i*2+3))
|
|
|
|
return false;
|
|
|
|
return true;
|
2006-04-07 01:23:16 +08:00
|
|
|
}
|
|
|
|
|
2006-04-07 06:02:42 +08:00
|
|
|
/// isVMerge - Common function, used to match vmrg* shuffles.
|
|
|
|
///
|
|
|
|
static bool isVMerge(SDNode *N, unsigned UnitSize,
|
|
|
|
unsigned LHSStart, unsigned RHSStart) {
|
2006-04-07 05:11:54 +08:00
|
|
|
assert(N->getOpcode() == ISD::BUILD_VECTOR &&
|
|
|
|
N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
|
|
|
|
assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
|
|
|
|
"Unsupported merge size!");
|
|
|
|
|
|
|
|
for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
|
|
|
|
for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
|
|
|
|
if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j),
|
2006-04-07 06:02:42 +08:00
|
|
|
LHSStart+j+i*UnitSize) ||
|
2006-04-07 05:11:54 +08:00
|
|
|
!isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j),
|
2006-04-07 06:02:42 +08:00
|
|
|
RHSStart+j+i*UnitSize))
|
2006-04-07 05:11:54 +08:00
|
|
|
return false;
|
|
|
|
}
|
2006-04-07 06:02:42 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
|
|
|
|
/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
|
|
|
|
bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
|
|
|
|
if (!isUnary)
|
|
|
|
return isVMerge(N, UnitSize, 8, 24);
|
|
|
|
return isVMerge(N, UnitSize, 8, 8);
|
2006-04-07 05:11:54 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
|
|
|
|
/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
|
2006-04-07 06:02:42 +08:00
|
|
|
bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
|
|
|
|
if (!isUnary)
|
|
|
|
return isVMerge(N, UnitSize, 0, 16);
|
|
|
|
return isVMerge(N, UnitSize, 0, 0);
|
2006-04-07 05:11:54 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-04-07 02:26:28 +08:00
|
|
|
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
|
|
|
|
/// amount, otherwise return -1.
|
|
|
|
int PPC::isVSLDOIShuffleMask(SDNode *N) {
|
2006-04-07 05:11:54 +08:00
|
|
|
assert(N->getOpcode() == ISD::BUILD_VECTOR &&
|
|
|
|
N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
|
2006-04-07 02:26:28 +08:00
|
|
|
// Find the first non-undef value in the shuffle mask.
|
|
|
|
unsigned i;
|
|
|
|
for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i)
|
|
|
|
/*search*/;
|
|
|
|
|
|
|
|
if (i == 16) return -1; // all undef.
|
|
|
|
|
|
|
|
// Otherwise, check to see if the rest of the elements are consequtively
|
|
|
|
// numbered from this value.
|
|
|
|
unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue();
|
|
|
|
if (ShiftAmt < i) return -1;
|
|
|
|
ShiftAmt -= i;
|
|
|
|
|
|
|
|
// Check the rest of the elements to see if they are consequtive.
|
|
|
|
for (++i; i != 16; ++i)
|
|
|
|
if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
return ShiftAmt;
|
|
|
|
}
|
2006-04-07 01:23:16 +08:00
|
|
|
|
2006-04-07 02:26:28 +08:00
|
|
|
/// isVSLDOIRotateShuffleMask - If this is a vsldoi rotate shuffle mask,
|
|
|
|
/// return the shift amount, otherwise return -1. Note that vlsdoi(x,x) will
|
|
|
|
/// result in the shuffle being changed to shuffle(x,undef, ...) with
|
|
|
|
/// transformed byte numbers.
|
|
|
|
int PPC::isVSLDOIRotateShuffleMask(SDNode *N) {
|
|
|
|
assert(N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
|
|
|
|
// Find the first non-undef value in the shuffle mask.
|
|
|
|
unsigned i;
|
|
|
|
for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i)
|
|
|
|
/*search*/;
|
|
|
|
|
|
|
|
if (i == 16) return -1; // all undef.
|
|
|
|
|
|
|
|
// Otherwise, check to see if the rest of the elements are consequtively
|
|
|
|
// numbered from this value.
|
|
|
|
unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue();
|
|
|
|
if (ShiftAmt < i) return -1;
|
|
|
|
ShiftAmt -= i;
|
|
|
|
|
|
|
|
// Check the rest of the elements to see if they are consequtive.
|
|
|
|
for (++i; i != 16; ++i)
|
|
|
|
if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
return ShiftAmt;
|
|
|
|
}
|
2006-03-20 14:33:01 +08:00
|
|
|
|
|
|
|
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
|
|
|
|
/// specifies a splat of a single element that is suitable for input to
|
|
|
|
/// VSPLTB/VSPLTH/VSPLTW.
|
2006-04-05 01:25:31 +08:00
|
|
|
bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) {
|
|
|
|
assert(N->getOpcode() == ISD::BUILD_VECTOR &&
|
|
|
|
N->getNumOperands() == 16 &&
|
|
|
|
(EltSize == 1 || EltSize == 2 || EltSize == 4));
|
2006-03-20 14:51:10 +08:00
|
|
|
|
2006-03-20 14:37:44 +08:00
|
|
|
// This is a splat operation if each element of the permute is the same, and
|
|
|
|
// if the value doesn't reference the second vector.
|
2006-04-05 01:25:31 +08:00
|
|
|
unsigned ElementBase = 0;
|
2006-03-20 14:37:44 +08:00
|
|
|
SDOperand Elt = N->getOperand(0);
|
2006-04-05 01:25:31 +08:00
|
|
|
if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt))
|
|
|
|
ElementBase = EltV->getValue();
|
|
|
|
else
|
|
|
|
return false; // FIXME: Handle UNDEF elements too!
|
|
|
|
|
|
|
|
if (cast<ConstantSDNode>(Elt)->getValue() >= 16)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Check that they are consequtive.
|
|
|
|
for (unsigned i = 1; i != EltSize; ++i) {
|
|
|
|
if (!isa<ConstantSDNode>(N->getOperand(i)) ||
|
|
|
|
cast<ConstantSDNode>(N->getOperand(i))->getValue() != i+ElementBase)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2006-03-20 14:37:44 +08:00
|
|
|
assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
|
2006-04-05 01:25:31 +08:00
|
|
|
for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
|
2006-03-20 14:37:44 +08:00
|
|
|
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
|
|
|
|
"Invalid VECTOR_SHUFFLE mask!");
|
2006-04-05 01:25:31 +08:00
|
|
|
for (unsigned j = 0; j != EltSize; ++j)
|
|
|
|
if (N->getOperand(i+j) != N->getOperand(j))
|
|
|
|
return false;
|
2006-03-20 14:37:44 +08:00
|
|
|
}
|
|
|
|
|
2006-04-05 01:25:31 +08:00
|
|
|
return true;
|
2006-03-20 14:33:01 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
|
|
|
|
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
|
2006-04-05 01:25:31 +08:00
|
|
|
unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
|
|
|
|
assert(isSplatShuffleMask(N, EltSize));
|
|
|
|
return cast<ConstantSDNode>(N->getOperand(0))->getValue() / EltSize;
|
2006-03-20 14:33:01 +08:00
|
|
|
}
|
|
|
|
|
2006-03-25 14:12:06 +08:00
|
|
|
/// isVecSplatImm - Return true if this is a build_vector of constants which
|
|
|
|
/// can be formed by using a vspltis[bhw] instruction. The ByteSize field
|
|
|
|
/// indicates the number of bytes of each element [124] -> [bhw].
|
|
|
|
bool PPC::isVecSplatImm(SDNode *N, unsigned ByteSize, char *Val) {
|
|
|
|
SDOperand OpVal(0, 0);
|
|
|
|
// Check to see if this buildvec has a single non-undef value in its elements.
|
|
|
|
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
|
|
|
|
if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
|
|
|
|
if (OpVal.Val == 0)
|
|
|
|
OpVal = N->getOperand(i);
|
|
|
|
else if (OpVal != N->getOperand(i))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (OpVal.Val == 0) return false; // All UNDEF: use implicit def.
|
|
|
|
|
2006-03-28 12:15:58 +08:00
|
|
|
unsigned ValSizeInBytes = 0;
|
|
|
|
uint64_t Value = 0;
|
2006-03-25 14:12:06 +08:00
|
|
|
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
|
|
|
|
Value = CN->getValue();
|
|
|
|
ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8;
|
|
|
|
} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
|
|
|
|
assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
|
|
|
|
Value = FloatToBits(CN->getValue());
|
|
|
|
ValSizeInBytes = 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the splat value is larger than the element value, then we can never do
|
|
|
|
// this splat. The only case that we could fit the replicated bits into our
|
|
|
|
// immediate field for would be zero, and we prefer to use vxor for it.
|
|
|
|
if (ValSizeInBytes < ByteSize) return false;
|
|
|
|
|
|
|
|
// If the element value is larger than the splat value, cut it in half and
|
|
|
|
// check to see if the two halves are equal. Continue doing this until we
|
|
|
|
// get to ByteSize. This allows us to handle 0x01010101 as 0x01.
|
|
|
|
while (ValSizeInBytes > ByteSize) {
|
|
|
|
ValSizeInBytes >>= 1;
|
|
|
|
|
|
|
|
// If the top half equals the bottom half, we're still ok.
|
2006-04-06 01:39:25 +08:00
|
|
|
if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
|
|
|
|
(Value & ((1 << (8*ValSizeInBytes))-1)))
|
2006-03-25 14:12:06 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Properly sign extend the value.
|
|
|
|
int ShAmt = (4-ByteSize)*8;
|
|
|
|
int MaskVal = ((int)Value << ShAmt) >> ShAmt;
|
|
|
|
|
2006-03-26 17:52:32 +08:00
|
|
|
// If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
|
2006-03-25 14:12:06 +08:00
|
|
|
if (MaskVal == 0) return false;
|
|
|
|
|
|
|
|
if (Val) *Val = MaskVal;
|
|
|
|
|
|
|
|
// Finally, if this value fits in a 5 bit sext field, return true.
|
|
|
|
return ((MaskVal << (32-5)) >> (32-5)) == MaskVal;
|
|
|
|
}
|
|
|
|
|
2006-03-20 14:33:01 +08:00
|
|
|
|
2005-08-26 08:52:45 +08:00
|
|
|
/// LowerOperation - Provide custom lowering hooks for some operations.
|
|
|
|
///
|
2005-10-16 13:39:50 +08:00
|
|
|
SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
|
2005-08-26 08:52:45 +08:00
|
|
|
switch (Op.getOpcode()) {
|
|
|
|
default: assert(0 && "Wasn't expecting to be able to lower this!");
|
2005-09-01 05:09:52 +08:00
|
|
|
case ISD::FP_TO_SINT: {
|
2005-09-07 06:03:27 +08:00
|
|
|
assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType()));
|
2005-10-02 14:37:13 +08:00
|
|
|
SDOperand Src = Op.getOperand(0);
|
|
|
|
if (Src.getValueType() == MVT::f32)
|
|
|
|
Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src);
|
|
|
|
|
2005-12-23 08:59:59 +08:00
|
|
|
SDOperand Tmp;
|
2005-09-07 06:03:27 +08:00
|
|
|
switch (Op.getValueType()) {
|
|
|
|
default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");
|
|
|
|
case MVT::i32:
|
2005-12-23 08:59:59 +08:00
|
|
|
Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src);
|
2005-09-07 06:03:27 +08:00
|
|
|
break;
|
|
|
|
case MVT::i64:
|
2005-12-23 08:59:59 +08:00
|
|
|
Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src);
|
2005-09-07 06:03:27 +08:00
|
|
|
break;
|
|
|
|
}
|
2005-09-01 05:09:52 +08:00
|
|
|
|
2005-12-23 08:59:59 +08:00
|
|
|
// Convert the FP value to an int value through memory.
|
|
|
|
SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Tmp);
|
|
|
|
if (Op.getValueType() == MVT::i32)
|
|
|
|
Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits);
|
|
|
|
return Bits;
|
2005-09-07 06:03:27 +08:00
|
|
|
}
|
When possible, custom lower 32-bit SINT_TO_FP to this:
_foo2:
extsw r2, r3
std r2, -8(r1)
lfd f0, -8(r1)
fcfid f0, f0
frsp f1, f0
blr
instead of this:
_foo2:
lis r2, ha16(LCPI2_0)
lis r4, 17200
xoris r3, r3, 32768
stw r3, -4(r1)
stw r4, -8(r1)
lfs f0, lo16(LCPI2_0)(r2)
lfd f1, -8(r1)
fsub f0, f1, f0
frsp f1, f0
blr
This speeds up Misc/pi from 2.44s->2.09s with LLC and from 3.01->2.18s
with llcbeta (16.7% and 38.1% respectively).
llvm-svn: 26943
2006-03-22 13:30:33 +08:00
|
|
|
case ISD::SINT_TO_FP:
|
|
|
|
if (Op.getOperand(0).getValueType() == MVT::i64) {
|
|
|
|
SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));
|
|
|
|
SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits);
|
|
|
|
if (Op.getValueType() == MVT::f32)
|
|
|
|
FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
|
|
|
|
return FP;
|
|
|
|
} else {
|
|
|
|
assert(Op.getOperand(0).getValueType() == MVT::i32 &&
|
|
|
|
"Unhandled SINT_TO_FP type in custom expander!");
|
|
|
|
// Since we only generate this in 64-bit mode, we can take advantage of
|
|
|
|
// 64-bit registers. In particular, sign extend the input value into the
|
|
|
|
// 64-bit register with extsw, store the WHOLE 64-bit value into the stack
|
|
|
|
// then lfd it and fcfid it.
|
|
|
|
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
|
|
|
|
int FrameIdx = FrameInfo->CreateStackObject(8, 8);
|
|
|
|
SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);
|
|
|
|
|
|
|
|
SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32,
|
|
|
|
Op.getOperand(0));
|
|
|
|
|
|
|
|
// STD the extended value into the stack slot.
|
|
|
|
SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other,
|
|
|
|
DAG.getEntryNode(), Ext64, FIdx,
|
|
|
|
DAG.getSrcValue(NULL));
|
|
|
|
// Load the value as a double.
|
|
|
|
SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, DAG.getSrcValue(NULL));
|
|
|
|
|
|
|
|
// FCFID it and return it.
|
|
|
|
SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld);
|
|
|
|
if (Op.getValueType() == MVT::f32)
|
|
|
|
FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
|
|
|
|
return FP;
|
|
|
|
}
|
2006-03-24 15:53:47 +08:00
|
|
|
break;
|
When possible, custom lower 32-bit SINT_TO_FP to this:
_foo2:
extsw r2, r3
std r2, -8(r1)
lfd f0, -8(r1)
fcfid f0, f0
frsp f1, f0
blr
instead of this:
_foo2:
lis r2, ha16(LCPI2_0)
lis r4, 17200
xoris r3, r3, 32768
stw r3, -4(r1)
stw r4, -8(r1)
lfs f0, lo16(LCPI2_0)(r2)
lfd f1, -8(r1)
fsub f0, f1, f0
frsp f1, f0
blr
This speeds up Misc/pi from 2.44s->2.09s with LLC and from 3.01->2.18s
with llcbeta (16.7% and 38.1% respectively).
llvm-svn: 26943
2006-03-22 13:30:33 +08:00
|
|
|
|
2005-09-01 05:09:52 +08:00
|
|
|
case ISD::SELECT_CC: {
|
2005-08-26 08:52:45 +08:00
|
|
|
// Turn FP only select_cc's into fsel instructions.
|
2005-09-01 05:09:52 +08:00
|
|
|
if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) ||
|
|
|
|
!MVT::isFloatingPoint(Op.getOperand(2).getValueType()))
|
|
|
|
break;
|
|
|
|
|
|
|
|
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
|
|
|
|
|
|
|
|
// Cannot handle SETEQ/SETNE.
|
|
|
|
if (CC == ISD::SETEQ || CC == ISD::SETNE) break;
|
|
|
|
|
|
|
|
MVT::ValueType ResVT = Op.getValueType();
|
|
|
|
MVT::ValueType CmpVT = Op.getOperand(0).getValueType();
|
|
|
|
SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
|
|
|
|
SDOperand TV = Op.getOperand(2), FV = Op.getOperand(3);
|
2005-08-26 08:52:45 +08:00
|
|
|
|
2005-09-01 05:09:52 +08:00
|
|
|
// If the RHS of the comparison is a 0.0, we don't need to do the
|
|
|
|
// subtraction at all.
|
|
|
|
if (isFloatingPointZero(RHS))
|
2005-08-26 08:52:45 +08:00
|
|
|
switch (CC) {
|
2006-01-19 03:42:35 +08:00
|
|
|
default: break; // SETUO etc aren't handled by fsel.
|
2005-08-26 08:52:45 +08:00
|
|
|
case ISD::SETULT:
|
|
|
|
case ISD::SETLT:
|
2005-09-01 05:09:52 +08:00
|
|
|
std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
|
2005-08-26 08:52:45 +08:00
|
|
|
case ISD::SETUGE:
|
|
|
|
case ISD::SETGE:
|
2005-10-26 04:54:57 +08:00
|
|
|
if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
|
|
|
|
LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
|
2005-09-01 05:09:52 +08:00
|
|
|
return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV);
|
2005-08-26 08:52:45 +08:00
|
|
|
case ISD::SETUGT:
|
|
|
|
case ISD::SETGT:
|
2005-09-01 05:09:52 +08:00
|
|
|
std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
|
2005-08-26 08:52:45 +08:00
|
|
|
case ISD::SETULE:
|
|
|
|
case ISD::SETLE:
|
2005-10-26 04:54:57 +08:00
|
|
|
if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
|
|
|
|
LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
|
2005-08-27 04:25:03 +08:00
|
|
|
return DAG.getNode(PPCISD::FSEL, ResVT,
|
2005-10-27 02:01:11 +08:00
|
|
|
DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV);
|
2005-08-26 08:52:45 +08:00
|
|
|
}
|
2005-09-01 05:09:52 +08:00
|
|
|
|
2005-10-26 04:54:57 +08:00
|
|
|
SDOperand Cmp;
|
2005-09-01 05:09:52 +08:00
|
|
|
switch (CC) {
|
2006-01-19 03:42:35 +08:00
|
|
|
default: break; // SETUO etc aren't handled by fsel.
|
2005-09-01 05:09:52 +08:00
|
|
|
case ISD::SETULT:
|
|
|
|
case ISD::SETLT:
|
2005-10-26 04:54:57 +08:00
|
|
|
Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
|
|
|
|
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
|
|
|
|
Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
|
|
|
|
return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
|
2005-09-01 05:09:52 +08:00
|
|
|
case ISD::SETUGE:
|
|
|
|
case ISD::SETGE:
|
2005-10-26 04:54:57 +08:00
|
|
|
Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
|
|
|
|
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
|
|
|
|
Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
|
|
|
|
return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
|
2005-09-01 05:09:52 +08:00
|
|
|
case ISD::SETUGT:
|
|
|
|
case ISD::SETGT:
|
2005-10-26 04:54:57 +08:00
|
|
|
Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
|
|
|
|
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
|
|
|
|
Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
|
|
|
|
return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
|
2005-09-01 05:09:52 +08:00
|
|
|
case ISD::SETULE:
|
|
|
|
case ISD::SETLE:
|
2005-10-26 04:54:57 +08:00
|
|
|
Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
|
|
|
|
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
|
|
|
|
Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
|
|
|
|
return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
|
2005-08-26 08:52:45 +08:00
|
|
|
}
|
2005-09-01 05:09:52 +08:00
|
|
|
break;
|
|
|
|
}
|
2005-09-01 04:23:54 +08:00
|
|
|
case ISD::SHL: {
|
|
|
|
assert(Op.getValueType() == MVT::i64 &&
|
|
|
|
Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
|
|
|
|
// The generic code does a fine job expanding shift by a constant.
|
|
|
|
if (isa<ConstantSDNode>(Op.getOperand(1))) break;
|
|
|
|
|
|
|
|
// Otherwise, expand into a bunch of logical ops. Note that these ops
|
|
|
|
// depend on the PPC behavior for oversized shift amounts.
|
|
|
|
SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
|
|
|
|
DAG.getConstant(0, MVT::i32));
|
|
|
|
SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
|
|
|
|
DAG.getConstant(1, MVT::i32));
|
|
|
|
SDOperand Amt = Op.getOperand(1);
|
|
|
|
|
|
|
|
SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
|
|
|
|
DAG.getConstant(32, MVT::i32), Amt);
|
2005-12-06 10:10:38 +08:00
|
|
|
SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Amt);
|
|
|
|
SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Tmp1);
|
2005-09-01 04:23:54 +08:00
|
|
|
SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
|
|
|
|
SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
|
|
|
|
DAG.getConstant(-32U, MVT::i32));
|
2005-12-06 10:10:38 +08:00
|
|
|
SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Tmp5);
|
2005-09-01 04:23:54 +08:00
|
|
|
SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
|
2005-12-06 10:10:38 +08:00
|
|
|
SDOperand OutLo = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Amt);
|
2005-09-01 04:23:54 +08:00
|
|
|
return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
|
|
|
|
}
|
|
|
|
case ISD::SRL: {
|
|
|
|
assert(Op.getValueType() == MVT::i64 &&
|
|
|
|
Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
|
|
|
|
// The generic code does a fine job expanding shift by a constant.
|
|
|
|
if (isa<ConstantSDNode>(Op.getOperand(1))) break;
|
|
|
|
|
|
|
|
// Otherwise, expand into a bunch of logical ops. Note that these ops
|
|
|
|
// depend on the PPC behavior for oversized shift amounts.
|
|
|
|
SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
|
|
|
|
DAG.getConstant(0, MVT::i32));
|
|
|
|
SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
|
|
|
|
DAG.getConstant(1, MVT::i32));
|
|
|
|
SDOperand Amt = Op.getOperand(1);
|
|
|
|
|
|
|
|
SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
|
|
|
|
DAG.getConstant(32, MVT::i32), Amt);
|
2005-12-06 10:10:38 +08:00
|
|
|
SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
|
|
|
|
SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
|
2005-09-01 04:23:54 +08:00
|
|
|
SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
|
|
|
|
SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
|
|
|
|
DAG.getConstant(-32U, MVT::i32));
|
2005-12-06 10:10:38 +08:00
|
|
|
SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Tmp5);
|
2005-09-01 04:23:54 +08:00
|
|
|
SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
|
2005-12-06 10:10:38 +08:00
|
|
|
SDOperand OutHi = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Amt);
|
2005-09-01 04:23:54 +08:00
|
|
|
return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
|
|
|
|
}
|
|
|
|
case ISD::SRA: {
|
2005-09-01 03:09:57 +08:00
|
|
|
assert(Op.getValueType() == MVT::i64 &&
|
|
|
|
Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!");
|
|
|
|
// The generic code does a fine job expanding shift by a constant.
|
|
|
|
if (isa<ConstantSDNode>(Op.getOperand(1))) break;
|
|
|
|
|
|
|
|
// Otherwise, expand into a bunch of logical ops, followed by a select_cc.
|
|
|
|
SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
|
|
|
|
DAG.getConstant(0, MVT::i32));
|
|
|
|
SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
|
|
|
|
DAG.getConstant(1, MVT::i32));
|
|
|
|
SDOperand Amt = Op.getOperand(1);
|
|
|
|
|
|
|
|
SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
|
|
|
|
DAG.getConstant(32, MVT::i32), Amt);
|
2005-12-06 10:10:38 +08:00
|
|
|
SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
|
|
|
|
SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
|
2005-09-01 03:09:57 +08:00
|
|
|
SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
|
|
|
|
SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
|
|
|
|
DAG.getConstant(-32U, MVT::i32));
|
2005-12-06 10:10:38 +08:00
|
|
|
SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Tmp5);
|
|
|
|
SDOperand OutHi = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Amt);
|
2005-09-01 03:09:57 +08:00
|
|
|
SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32),
|
|
|
|
Tmp4, Tmp6, ISD::SETLE);
|
|
|
|
return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
|
2005-08-26 08:52:45 +08:00
|
|
|
}
|
2005-12-10 10:36:00 +08:00
|
|
|
case ISD::ConstantPool: {
|
2006-02-01 06:23:14 +08:00
|
|
|
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
|
|
|
|
Constant *C = CP->get();
|
|
|
|
SDOperand CPI = DAG.getTargetConstantPool(C, MVT::i32, CP->getAlignment());
|
2005-12-10 10:36:00 +08:00
|
|
|
SDOperand Zero = DAG.getConstant(0, MVT::i32);
|
|
|
|
|
2006-02-23 04:19:42 +08:00
|
|
|
if (getTargetMachine().getRelocationModel() == Reloc::Static) {
|
2005-12-10 10:36:00 +08:00
|
|
|
// Generate non-pic code that has direct accesses to the constant pool.
|
|
|
|
// The address of the global is just (hi(&g)+lo(&g)).
|
|
|
|
SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero);
|
|
|
|
SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero);
|
|
|
|
return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Only lower ConstantPool on Darwin.
|
|
|
|
if (!getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin()) break;
|
|
|
|
SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero);
|
2006-02-23 04:19:42 +08:00
|
|
|
if (getTargetMachine().getRelocationModel() == Reloc::PIC) {
|
2005-12-10 10:36:00 +08:00
|
|
|
// With PIC, the first instruction is actually "GR+hi(&G)".
|
|
|
|
Hi = DAG.getNode(ISD::ADD, MVT::i32,
|
|
|
|
DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi);
|
|
|
|
}
|
|
|
|
|
|
|
|
SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero);
|
|
|
|
Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
|
|
|
|
return Lo;
|
|
|
|
}
|
2005-11-17 15:30:41 +08:00
|
|
|
case ISD::GlobalAddress: {
|
2005-12-24 09:00:15 +08:00
|
|
|
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
|
|
|
|
GlobalValue *GV = GSDN->getGlobal();
|
|
|
|
SDOperand GA = DAG.getTargetGlobalAddress(GV, MVT::i32, GSDN->getOffset());
|
2005-11-17 15:30:41 +08:00
|
|
|
SDOperand Zero = DAG.getConstant(0, MVT::i32);
|
2005-11-18 02:55:48 +08:00
|
|
|
|
2006-02-23 04:19:42 +08:00
|
|
|
if (getTargetMachine().getRelocationModel() == Reloc::Static) {
|
2005-12-10 10:36:00 +08:00
|
|
|
// Generate non-pic code that has direct accesses to globals.
|
|
|
|
// The address of the global is just (hi(&g)+lo(&g)).
|
2005-11-18 02:55:48 +08:00
|
|
|
SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero);
|
|
|
|
SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero);
|
|
|
|
return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
|
|
|
|
}
|
2005-11-17 15:30:41 +08:00
|
|
|
|
2005-11-18 02:55:48 +08:00
|
|
|
// Only lower GlobalAddress on Darwin.
|
|
|
|
if (!getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin()) break;
|
2006-01-06 09:04:03 +08:00
|
|
|
|
2005-11-17 15:30:41 +08:00
|
|
|
SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero);
|
2006-02-23 04:19:42 +08:00
|
|
|
if (getTargetMachine().getRelocationModel() == Reloc::PIC) {
|
2005-11-17 15:30:41 +08:00
|
|
|
// With PIC, the first instruction is actually "GR+hi(&G)".
|
|
|
|
Hi = DAG.getNode(ISD::ADD, MVT::i32,
|
2005-11-18 01:51:38 +08:00
|
|
|
DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi);
|
2005-11-17 15:30:41 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero);
|
|
|
|
Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
|
|
|
|
|
2006-01-30 04:49:17 +08:00
|
|
|
if (!GV->hasWeakLinkage() && !GV->hasLinkOnceLinkage() &&
|
|
|
|
(!GV->isExternal() || GV->hasNotBeenReadFromBytecode()))
|
2005-11-17 15:30:41 +08:00
|
|
|
return Lo;
|
|
|
|
|
|
|
|
// If the global is weak or external, we have to go through the lazy
|
|
|
|
// resolution stub.
|
|
|
|
return DAG.getLoad(MVT::i32, DAG.getEntryNode(), Lo, DAG.getSrcValue(0));
|
|
|
|
}
|
Codegen
bool %test(int %X) {
%Y = seteq int %X, 13
ret bool %Y
}
as
_test:
addi r2, r3, -13
cntlzw r2, r2
srwi r3, r2, 5
blr
rather than
_test:
cmpwi cr7, r3, 13
mfcr r2
rlwinm r3, r2, 31, 31, 31
blr
This has very little effect on most code, but speeds up analyzer 23% and
mason 11%
llvm-svn: 25848
2006-01-31 16:17:29 +08:00
|
|
|
case ISD::SETCC: {
|
|
|
|
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
|
2006-02-01 15:19:44 +08:00
|
|
|
|
|
|
|
// If we're comparing for equality to zero, expose the fact that this is
|
|
|
|
// implented as a ctlz/srl pair on ppc, so that the dag combiner can
|
|
|
|
// fold the new nodes.
|
|
|
|
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
|
|
|
|
if (C->isNullValue() && CC == ISD::SETEQ) {
|
|
|
|
MVT::ValueType VT = Op.getOperand(0).getValueType();
|
|
|
|
SDOperand Zext = Op.getOperand(0);
|
|
|
|
if (VT < MVT::i32) {
|
|
|
|
VT = MVT::i32;
|
|
|
|
Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0));
|
|
|
|
}
|
|
|
|
unsigned Log2b = Log2_32(MVT::getSizeInBits(VT));
|
|
|
|
SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext);
|
|
|
|
SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz,
|
|
|
|
DAG.getConstant(Log2b, getShiftAmountTy()));
|
|
|
|
return DAG.getNode(ISD::TRUNCATE, getSetCCResultTy(), Scc);
|
|
|
|
}
|
|
|
|
// Leave comparisons against 0 and -1 alone for now, since they're usually
|
|
|
|
// optimized. FIXME: revisit this when we can custom lower all setcc
|
|
|
|
// optimizations.
|
|
|
|
if (C->isAllOnesValue() || C->isNullValue())
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we have an integer seteq/setne, turn it into a compare against zero
|
|
|
|
// by subtracting the rhs from the lhs, which is faster than setting a
|
|
|
|
// condition register, reading it back out, and masking the correct bit.
|
|
|
|
MVT::ValueType LHSVT = Op.getOperand(0).getValueType();
|
|
|
|
if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
|
|
|
|
MVT::ValueType VT = Op.getValueType();
|
|
|
|
SDOperand Sub = DAG.getNode(ISD::SUB, LHSVT, Op.getOperand(0),
|
|
|
|
Op.getOperand(1));
|
|
|
|
return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC);
|
|
|
|
}
|
Codegen
bool %test(int %X) {
%Y = seteq int %X, 13
ret bool %Y
}
as
_test:
addi r2, r3, -13
cntlzw r2, r2
srwi r3, r2, 5
blr
rather than
_test:
cmpwi cr7, r3, 13
mfcr r2
rlwinm r3, r2, 31, 31, 31
blr
This has very little effect on most code, but speeds up analyzer 23% and
mason 11%
llvm-svn: 25848
2006-01-31 16:17:29 +08:00
|
|
|
break;
|
|
|
|
}
|
2006-01-26 02:21:52 +08:00
|
|
|
case ISD::VASTART: {
|
|
|
|
// vastart just stores the address of the VarArgsFrameIndex slot into the
|
|
|
|
// memory location argument.
|
|
|
|
// FIXME: Replace MVT::i32 with PointerTy
|
|
|
|
SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
|
|
|
|
return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR,
|
|
|
|
Op.getOperand(1), Op.getOperand(2));
|
|
|
|
}
|
2006-01-28 05:09:22 +08:00
|
|
|
case ISD::RET: {
|
|
|
|
SDOperand Copy;
|
|
|
|
|
|
|
|
switch(Op.getNumOperands()) {
|
|
|
|
default:
|
|
|
|
assert(0 && "Do not know how to return this many arguments!");
|
|
|
|
abort();
|
|
|
|
case 1:
|
|
|
|
return SDOperand(); // ret void is legal
|
|
|
|
case 2: {
|
|
|
|
MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
|
|
|
|
unsigned ArgReg = MVT::isInteger(ArgVT) ? PPC::R3 : PPC::F1;
|
|
|
|
Copy = DAG.getCopyToReg(Op.getOperand(0), ArgReg, Op.getOperand(1),
|
|
|
|
SDOperand());
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case 3:
|
|
|
|
Copy = DAG.getCopyToReg(Op.getOperand(0), PPC::R3, Op.getOperand(2),
|
|
|
|
SDOperand());
|
|
|
|
Copy = DAG.getCopyToReg(Copy, PPC::R4, Op.getOperand(1),Copy.getValue(1));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1));
|
|
|
|
}
|
2006-03-19 14:55:52 +08:00
|
|
|
case ISD::SCALAR_TO_VECTOR: {
|
|
|
|
// Create a stack slot that is 16-byte aligned.
|
|
|
|
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
|
|
|
|
int FrameIdx = FrameInfo->CreateStackObject(16, 16);
|
|
|
|
SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);
|
|
|
|
|
|
|
|
// Store the input value into Value#0 of the stack slot.
|
|
|
|
SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(),
|
|
|
|
Op.getOperand(0), FIdx,DAG.getSrcValue(NULL));
|
2006-03-28 09:43:22 +08:00
|
|
|
// Load it out.
|
|
|
|
return DAG.getLoad(Op.getValueType(), Store, FIdx, DAG.getSrcValue(NULL));
|
2006-03-19 14:55:52 +08:00
|
|
|
}
|
2006-03-24 15:48:08 +08:00
|
|
|
case ISD::BUILD_VECTOR:
|
|
|
|
// If this is a case we can't handle, return null and let the default
|
|
|
|
// expansion code take care of it. If we CAN select this case, return Op.
|
|
|
|
|
|
|
|
// See if this is all zeros.
|
|
|
|
// FIXME: We should handle splat(-0.0), and other cases here.
|
2006-03-26 17:52:32 +08:00
|
|
|
if (ISD::isBuildVectorAllZeros(Op.Val))
|
2006-03-24 15:48:08 +08:00
|
|
|
return Op;
|
2006-03-25 14:12:06 +08:00
|
|
|
|
|
|
|
if (PPC::isVecSplatImm(Op.Val, 1) || // vspltisb
|
|
|
|
PPC::isVecSplatImm(Op.Val, 2) || // vspltish
|
|
|
|
PPC::isVecSplatImm(Op.Val, 4)) // vspltisw
|
|
|
|
return Op;
|
|
|
|
|
2006-03-24 15:48:08 +08:00
|
|
|
return SDOperand();
|
|
|
|
|
2006-03-20 09:53:53 +08:00
|
|
|
case ISD::VECTOR_SHUFFLE: {
|
2006-03-20 14:51:10 +08:00
|
|
|
SDOperand V1 = Op.getOperand(0);
|
|
|
|
SDOperand V2 = Op.getOperand(1);
|
|
|
|
SDOperand PermMask = Op.getOperand(2);
|
|
|
|
|
|
|
|
// Cases that are handled by instructions that take permute immediates
|
|
|
|
// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
|
|
|
|
// selected by the instruction selector.
|
2006-04-07 06:02:42 +08:00
|
|
|
if (V2.getOpcode() == ISD::UNDEF) {
|
|
|
|
if (PPC::isSplatShuffleMask(PermMask.Val, 1) ||
|
|
|
|
PPC::isSplatShuffleMask(PermMask.Val, 2) ||
|
|
|
|
PPC::isSplatShuffleMask(PermMask.Val, 4) ||
|
|
|
|
PPC::isVSLDOIRotateShuffleMask(PermMask.Val) != -1 ||
|
|
|
|
PPC::isVMRGLShuffleMask(PermMask.Val, 1, true) ||
|
|
|
|
PPC::isVMRGLShuffleMask(PermMask.Val, 2, true) ||
|
|
|
|
PPC::isVMRGLShuffleMask(PermMask.Val, 4, true) ||
|
|
|
|
PPC::isVMRGHShuffleMask(PermMask.Val, 1, true) ||
|
|
|
|
PPC::isVMRGHShuffleMask(PermMask.Val, 2, true) ||
|
|
|
|
PPC::isVMRGHShuffleMask(PermMask.Val, 4, true)) {
|
|
|
|
return Op;
|
|
|
|
}
|
|
|
|
}
|
2006-03-20 14:51:10 +08:00
|
|
|
|
2006-04-07 01:23:16 +08:00
|
|
|
if (PPC::isVPKUWUMShuffleMask(PermMask.Val) ||
|
2006-04-07 02:26:28 +08:00
|
|
|
PPC::isVPKUHUMShuffleMask(PermMask.Val) ||
|
|
|
|
PPC::isVSLDOIShuffleMask(PermMask.Val) != -1 ||
|
2006-04-07 06:02:42 +08:00
|
|
|
PPC::isVMRGLShuffleMask(PermMask.Val, 1, false) ||
|
|
|
|
PPC::isVMRGLShuffleMask(PermMask.Val, 2, false) ||
|
|
|
|
PPC::isVMRGLShuffleMask(PermMask.Val, 4, false) ||
|
|
|
|
PPC::isVMRGHShuffleMask(PermMask.Val, 1, false) ||
|
|
|
|
PPC::isVMRGHShuffleMask(PermMask.Val, 2, false) ||
|
|
|
|
PPC::isVMRGHShuffleMask(PermMask.Val, 4, false))
|
2006-04-07 01:23:16 +08:00
|
|
|
return Op;
|
|
|
|
|
2006-03-20 14:51:10 +08:00
|
|
|
// TODO: Handle more cases, and also handle cases that are cheaper to do as
|
|
|
|
// multiple such instructions than as a constant pool load/vperm pair.
|
2006-03-20 09:53:53 +08:00
|
|
|
|
|
|
|
// Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
|
|
|
|
// vector that will get spilled to the constant pool.
|
|
|
|
if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
|
|
|
|
|
|
|
|
// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
|
|
|
|
// that it is in input element units, not in bytes. Convert now.
|
|
|
|
MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType());
|
|
|
|
unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
|
|
|
|
|
|
|
|
std::vector<SDOperand> ResultMask;
|
|
|
|
for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
|
|
|
|
unsigned SrcElt =cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
|
|
|
|
|
|
|
|
for (unsigned j = 0; j != BytesPerElement; ++j)
|
|
|
|
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
|
|
|
|
MVT::i8));
|
|
|
|
}
|
|
|
|
|
|
|
|
SDOperand VPermMask =DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask);
|
|
|
|
return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask);
|
|
|
|
}
|
2006-03-28 08:40:33 +08:00
|
|
|
case ISD::INTRINSIC_WO_CHAIN: {
|
2006-04-02 14:26:07 +08:00
|
|
|
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
|
2006-03-26 18:06:40 +08:00
|
|
|
|
|
|
|
// If this is a lowered altivec predicate compare, CompareOpc is set to the
|
|
|
|
// opcode number of the comparison.
|
|
|
|
int CompareOpc = -1;
|
2006-03-31 13:13:27 +08:00
|
|
|
bool isDot = false;
|
2006-03-26 18:06:40 +08:00
|
|
|
switch (IntNo) {
|
|
|
|
default: return SDOperand(); // Don't custom lower most intrinsics.
|
2006-03-31 13:13:27 +08:00
|
|
|
// Comparison predicates.
|
|
|
|
case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
|
|
|
|
|
|
|
|
// Normal Comparisons.
|
|
|
|
case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
|
2006-03-26 18:06:40 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
assert(CompareOpc>0 && "We only lower altivec predicate compares so far!");
|
|
|
|
|
2006-03-31 13:13:27 +08:00
|
|
|
// If this is a non-dot comparison, make the VCMP node.
|
|
|
|
if (!isDot)
|
|
|
|
return DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(),
|
|
|
|
Op.getOperand(1), Op.getOperand(2),
|
|
|
|
DAG.getConstant(CompareOpc, MVT::i32));
|
|
|
|
|
2006-03-26 18:06:40 +08:00
|
|
|
// Create the PPCISD altivec 'dot' comparison node.
|
|
|
|
std::vector<SDOperand> Ops;
|
|
|
|
std::vector<MVT::ValueType> VTs;
|
|
|
|
Ops.push_back(Op.getOperand(2)); // LHS
|
|
|
|
Ops.push_back(Op.getOperand(3)); // RHS
|
|
|
|
Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32));
|
|
|
|
VTs.push_back(Op.getOperand(2).getValueType());
|
|
|
|
VTs.push_back(MVT::Flag);
|
|
|
|
SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops);
|
|
|
|
|
|
|
|
// Now that we have the comparison, emit a copy from the CR to a GPR.
|
|
|
|
// This is flagged to the above dot comparison.
|
|
|
|
SDOperand Flags = DAG.getNode(PPCISD::MFCR, MVT::i32,
|
|
|
|
DAG.getRegister(PPC::CR6, MVT::i32),
|
|
|
|
CompNode.getValue(1));
|
|
|
|
|
|
|
|
// Unpack the result based on how the target uses it.
|
|
|
|
unsigned BitNo; // Bit # of CR6.
|
|
|
|
bool InvertBit; // Invert result?
|
|
|
|
switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) {
|
|
|
|
default: // Can't happen, don't crash on invalid number though.
|
|
|
|
case 0: // Return the value of the EQ bit of CR6.
|
|
|
|
BitNo = 0; InvertBit = false;
|
|
|
|
break;
|
|
|
|
case 1: // Return the inverted value of the EQ bit of CR6.
|
|
|
|
BitNo = 0; InvertBit = true;
|
|
|
|
break;
|
|
|
|
case 2: // Return the value of the LT bit of CR6.
|
|
|
|
BitNo = 2; InvertBit = false;
|
|
|
|
break;
|
|
|
|
case 3: // Return the inverted value of the LT bit of CR6.
|
|
|
|
BitNo = 2; InvertBit = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Shift the bit into the low position.
|
|
|
|
Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags,
|
|
|
|
DAG.getConstant(8-(3-BitNo), MVT::i32));
|
|
|
|
// Isolate the bit.
|
|
|
|
Flags = DAG.getNode(ISD::AND, MVT::i32, Flags,
|
|
|
|
DAG.getConstant(1, MVT::i32));
|
|
|
|
|
|
|
|
// If we are supposed to, toggle the bit.
|
|
|
|
if (InvertBit)
|
|
|
|
Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags,
|
|
|
|
DAG.getConstant(1, MVT::i32));
|
|
|
|
return Flags;
|
|
|
|
}
|
2005-09-01 04:23:54 +08:00
|
|
|
}
|
2005-08-26 08:52:45 +08:00
|
|
|
return SDOperand();
|
|
|
|
}
|
|
|
|
|
2005-08-17 01:14:42 +08:00
|
|
|
std::vector<SDOperand>
|
2005-10-16 13:39:50 +08:00
|
|
|
PPCTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
|
2005-08-17 01:14:42 +08:00
|
|
|
//
|
|
|
|
// add beautiful description of PPC stack frame format, or at least some docs
|
|
|
|
//
|
|
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
|
|
MachineFrameInfo *MFI = MF.getFrameInfo();
|
|
|
|
MachineBasicBlock& BB = MF.front();
|
2005-09-14 03:33:40 +08:00
|
|
|
SSARegMap *RegMap = MF.getSSARegMap();
|
2005-08-17 01:14:42 +08:00
|
|
|
std::vector<SDOperand> ArgValues;
|
|
|
|
|
|
|
|
unsigned ArgOffset = 24;
|
|
|
|
unsigned GPR_remaining = 8;
|
|
|
|
unsigned FPR_remaining = 13;
|
|
|
|
unsigned GPR_idx = 0, FPR_idx = 0;
|
|
|
|
static const unsigned GPR[] = {
|
|
|
|
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
|
|
|
|
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
|
|
|
|
};
|
|
|
|
static const unsigned FPR[] = {
|
|
|
|
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
|
|
|
|
PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
|
|
|
|
};
|
|
|
|
|
|
|
|
// Add DAG nodes to load the arguments... On entry to a function on PPC,
|
|
|
|
// the arguments start at offset 24, although they are likely to be passed
|
|
|
|
// in registers.
|
|
|
|
for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
|
|
|
|
SDOperand newroot, argt;
|
|
|
|
unsigned ObjSize;
|
|
|
|
bool needsLoad = false;
|
|
|
|
bool ArgLive = !I->use_empty();
|
|
|
|
MVT::ValueType ObjectVT = getValueType(I->getType());
|
|
|
|
|
|
|
|
switch (ObjectVT) {
|
2005-08-30 08:19:00 +08:00
|
|
|
default: assert(0 && "Unhandled argument type!");
|
|
|
|
case MVT::i1:
|
|
|
|
case MVT::i8:
|
|
|
|
case MVT::i16:
|
|
|
|
case MVT::i32:
|
|
|
|
ObjSize = 4;
|
|
|
|
if (!ArgLive) break;
|
|
|
|
if (GPR_remaining > 0) {
|
2005-10-18 08:28:58 +08:00
|
|
|
unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
|
2005-09-14 03:33:40 +08:00
|
|
|
MF.addLiveIn(GPR[GPR_idx], VReg);
|
|
|
|
argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
|
2005-08-31 09:58:39 +08:00
|
|
|
if (ObjectVT != MVT::i32) {
|
|
|
|
unsigned AssertOp = I->getType()->isSigned() ? ISD::AssertSext
|
|
|
|
: ISD::AssertZext;
|
|
|
|
argt = DAG.getNode(AssertOp, MVT::i32, argt,
|
|
|
|
DAG.getValueType(ObjectVT));
|
|
|
|
argt = DAG.getNode(ISD::TRUNCATE, ObjectVT, argt);
|
|
|
|
}
|
2005-08-30 08:19:00 +08:00
|
|
|
} else {
|
|
|
|
needsLoad = true;
|
|
|
|
}
|
|
|
|
break;
|
2005-12-01 04:40:54 +08:00
|
|
|
case MVT::i64:
|
|
|
|
ObjSize = 8;
|
2005-08-30 08:19:00 +08:00
|
|
|
if (!ArgLive) break;
|
|
|
|
if (GPR_remaining > 0) {
|
|
|
|
SDOperand argHi, argLo;
|
2005-10-18 08:28:58 +08:00
|
|
|
unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
|
2005-09-14 03:33:40 +08:00
|
|
|
MF.addLiveIn(GPR[GPR_idx], VReg);
|
|
|
|
argHi = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
|
2005-08-30 08:19:00 +08:00
|
|
|
// If we have two or more remaining argument registers, then both halves
|
|
|
|
// of the i64 can be sourced from there. Otherwise, the lower half will
|
|
|
|
// have to come off the stack. This can happen when an i64 is preceded
|
|
|
|
// by 28 bytes of arguments.
|
|
|
|
if (GPR_remaining > 1) {
|
2005-10-18 08:28:58 +08:00
|
|
|
unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
|
2005-09-14 03:33:40 +08:00
|
|
|
MF.addLiveIn(GPR[GPR_idx+1], VReg);
|
|
|
|
argLo = DAG.getCopyFromReg(argHi, VReg, MVT::i32);
|
2005-08-30 08:19:00 +08:00
|
|
|
} else {
|
|
|
|
int FI = MFI->CreateFixedObject(4, ArgOffset+4);
|
|
|
|
SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
|
|
|
|
argLo = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN,
|
|
|
|
DAG.getSrcValue(NULL));
|
|
|
|
}
|
|
|
|
// Build the outgoing arg thingy
|
|
|
|
argt = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, argLo, argHi);
|
|
|
|
newroot = argLo;
|
|
|
|
} else {
|
|
|
|
needsLoad = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case MVT::f32:
|
|
|
|
case MVT::f64:
|
|
|
|
ObjSize = (ObjectVT == MVT::f64) ? 8 : 4;
|
2006-01-12 02:21:25 +08:00
|
|
|
if (!ArgLive) {
|
|
|
|
if (FPR_remaining > 0) {
|
|
|
|
--FPR_remaining;
|
|
|
|
++FPR_idx;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2005-08-30 08:19:00 +08:00
|
|
|
if (FPR_remaining > 0) {
|
2005-10-01 09:35:02 +08:00
|
|
|
unsigned VReg;
|
|
|
|
if (ObjectVT == MVT::f32)
|
2005-10-18 08:28:58 +08:00
|
|
|
VReg = RegMap->createVirtualRegister(&PPC::F4RCRegClass);
|
2005-10-01 09:35:02 +08:00
|
|
|
else
|
2005-10-18 08:28:58 +08:00
|
|
|
VReg = RegMap->createVirtualRegister(&PPC::F8RCRegClass);
|
2005-09-14 03:33:40 +08:00
|
|
|
MF.addLiveIn(FPR[FPR_idx], VReg);
|
|
|
|
argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, ObjectVT);
|
2005-08-30 08:19:00 +08:00
|
|
|
--FPR_remaining;
|
|
|
|
++FPR_idx;
|
|
|
|
} else {
|
|
|
|
needsLoad = true;
|
|
|
|
}
|
|
|
|
break;
|
2005-08-17 01:14:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// We need to load the argument to a virtual register if we determined above
|
|
|
|
// that we ran out of physical registers of the appropriate type
|
|
|
|
if (needsLoad) {
|
|
|
|
unsigned SubregOffset = 0;
|
|
|
|
if (ObjectVT == MVT::i8 || ObjectVT == MVT::i1) SubregOffset = 3;
|
|
|
|
if (ObjectVT == MVT::i16) SubregOffset = 2;
|
|
|
|
int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
|
|
|
|
SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
|
|
|
|
FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN,
|
|
|
|
DAG.getConstant(SubregOffset, MVT::i32));
|
|
|
|
argt = newroot = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN,
|
|
|
|
DAG.getSrcValue(NULL));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Every 4 bytes of argument space consumes one of the GPRs available for
|
|
|
|
// argument passing.
|
|
|
|
if (GPR_remaining > 0) {
|
|
|
|
unsigned delta = (GPR_remaining > 1 && ObjSize == 8) ? 2 : 1;
|
|
|
|
GPR_remaining -= delta;
|
|
|
|
GPR_idx += delta;
|
|
|
|
}
|
|
|
|
ArgOffset += ObjSize;
|
|
|
|
if (newroot.Val)
|
|
|
|
DAG.setRoot(newroot.getValue(1));
|
|
|
|
|
|
|
|
ArgValues.push_back(argt);
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the function takes variable number of arguments, make a frame index for
|
|
|
|
// the start of the first vararg value... for expansion of llvm.va_start.
|
|
|
|
if (F.isVarArg()) {
|
|
|
|
VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);
|
|
|
|
SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
|
|
|
|
// If this function is vararg, store any remaining integer argument regs
|
|
|
|
// to their spots on the stack so that they may be loaded by deferencing the
|
|
|
|
// result of va_next.
|
|
|
|
std::vector<SDOperand> MemOps;
|
|
|
|
for (; GPR_remaining > 0; --GPR_remaining, ++GPR_idx) {
|
2005-10-18 08:28:58 +08:00
|
|
|
unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
|
2005-09-14 03:33:40 +08:00
|
|
|
MF.addLiveIn(GPR[GPR_idx], VReg);
|
|
|
|
SDOperand Val = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
|
2005-08-17 01:14:42 +08:00
|
|
|
SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1),
|
|
|
|
Val, FIN, DAG.getSrcValue(NULL));
|
|
|
|
MemOps.push_back(Store);
|
|
|
|
// Increment the address by four for the next argument to store
|
|
|
|
SDOperand PtrOff = DAG.getConstant(4, getPointerTy());
|
|
|
|
FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN, PtrOff);
|
|
|
|
}
|
2005-12-01 04:40:54 +08:00
|
|
|
if (!MemOps.empty()) {
|
|
|
|
MemOps.push_back(DAG.getRoot());
|
|
|
|
DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps));
|
|
|
|
}
|
2005-08-17 01:14:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Finally, inform the code generator which regs we return values in.
|
|
|
|
switch (getValueType(F.getReturnType())) {
|
|
|
|
default: assert(0 && "Unknown type!");
|
|
|
|
case MVT::isVoid: break;
|
|
|
|
case MVT::i1:
|
|
|
|
case MVT::i8:
|
|
|
|
case MVT::i16:
|
|
|
|
case MVT::i32:
|
|
|
|
MF.addLiveOut(PPC::R3);
|
|
|
|
break;
|
|
|
|
case MVT::i64:
|
|
|
|
MF.addLiveOut(PPC::R3);
|
|
|
|
MF.addLiveOut(PPC::R4);
|
|
|
|
break;
|
|
|
|
case MVT::f32:
|
|
|
|
case MVT::f64:
|
|
|
|
MF.addLiveOut(PPC::F1);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ArgValues;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::pair<SDOperand, SDOperand>
|
2005-10-16 13:39:50 +08:00
|
|
|
PPCTargetLowering::LowerCallTo(SDOperand Chain,
|
|
|
|
const Type *RetTy, bool isVarArg,
|
|
|
|
unsigned CallingConv, bool isTailCall,
|
|
|
|
SDOperand Callee, ArgListTy &Args,
|
|
|
|
SelectionDAG &DAG) {
|
2006-01-28 07:34:02 +08:00
|
|
|
// args_to_use will accumulate outgoing args for the PPCISD::CALL case in
|
2005-08-17 01:14:42 +08:00
|
|
|
// SelectExpr to use to put the arguments in the appropriate registers.
|
|
|
|
std::vector<SDOperand> args_to_use;
|
|
|
|
|
|
|
|
// Count how many bytes are to be pushed on the stack, including the linkage
|
|
|
|
// area, and parameter passing area.
|
|
|
|
unsigned NumBytes = 24;
|
|
|
|
|
|
|
|
if (Args.empty()) {
|
2006-02-13 16:55:29 +08:00
|
|
|
Chain = DAG.getCALLSEQ_START(Chain,
|
|
|
|
DAG.getConstant(NumBytes, getPointerTy()));
|
2005-08-17 01:14:42 +08:00
|
|
|
} else {
|
2005-08-30 08:19:00 +08:00
|
|
|
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
|
2005-08-17 01:14:42 +08:00
|
|
|
switch (getValueType(Args[i].second)) {
|
2005-08-30 08:19:00 +08:00
|
|
|
default: assert(0 && "Unknown value type!");
|
|
|
|
case MVT::i1:
|
|
|
|
case MVT::i8:
|
|
|
|
case MVT::i16:
|
|
|
|
case MVT::i32:
|
|
|
|
case MVT::f32:
|
|
|
|
NumBytes += 4;
|
|
|
|
break;
|
|
|
|
case MVT::i64:
|
|
|
|
case MVT::f64:
|
|
|
|
NumBytes += 8;
|
|
|
|
break;
|
2005-08-17 01:14:42 +08:00
|
|
|
}
|
2005-08-30 08:19:00 +08:00
|
|
|
}
|
2005-08-17 01:14:42 +08:00
|
|
|
|
2005-08-30 08:19:00 +08:00
|
|
|
// Just to be safe, we'll always reserve the full 24 bytes of linkage area
|
|
|
|
// plus 32 bytes of argument space in case any called code gets funky on us.
|
|
|
|
// (Required by ABI to support var arg)
|
|
|
|
if (NumBytes < 56) NumBytes = 56;
|
2005-08-17 01:14:42 +08:00
|
|
|
|
|
|
|
// Adjust the stack pointer for the new arguments...
|
|
|
|
// These operations are automatically eliminated by the prolog/epilog pass
|
2006-02-13 16:55:29 +08:00
|
|
|
Chain = DAG.getCALLSEQ_START(Chain,
|
|
|
|
DAG.getConstant(NumBytes, getPointerTy()));
|
2005-08-17 01:14:42 +08:00
|
|
|
|
|
|
|
// Set up a copy of the stack pointer for use loading and storing any
|
|
|
|
// arguments that may not fit in the registers available for argument
|
|
|
|
// passing.
|
Fix calls that need to store values in stack slots, to not copy the stack
pointer. This allows us to emit stuff like this:
li r10, 0
stw r10, 56(r1)
or r3, r10, r10
or r4, r10, r10
or r5, r10, r10
or r6, r10, r10
or r7, r10, r10
or r8, r10, r10
or r9, r10, r10
bl L_bar$stub
instead of this:
or r2, r1, r1 ;; Extraneous copy.
li r10, 0
stw r10, 56(r2)
or r3, r10, r10
or r4, r10, r10
or r5, r10, r10
or r6, r10, r10
or r7, r10, r10
or r8, r10, r10
or r9, r10, r10
bl L_bar$stub
wowness.
llvm-svn: 25221
2006-01-12 03:55:07 +08:00
|
|
|
SDOperand StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
|
2005-08-17 01:14:42 +08:00
|
|
|
|
|
|
|
// Figure out which arguments are going to go in registers, and which in
|
|
|
|
// memory. Also, if this is a vararg function, floating point operations
|
|
|
|
// must be stored to our stack, and loaded into integer regs as well, if
|
|
|
|
// any integer regs are available for argument passing.
|
|
|
|
unsigned ArgOffset = 24;
|
|
|
|
unsigned GPR_remaining = 8;
|
|
|
|
unsigned FPR_remaining = 13;
|
|
|
|
|
|
|
|
std::vector<SDOperand> MemOps;
|
|
|
|
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
|
|
|
|
// PtrOff will be used to store the current argument to the stack if a
|
|
|
|
// register cannot be found for it.
|
|
|
|
SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
|
|
|
|
PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
|
|
|
|
MVT::ValueType ArgVT = getValueType(Args[i].second);
|
|
|
|
|
|
|
|
switch (ArgVT) {
|
2005-08-30 08:19:00 +08:00
|
|
|
default: assert(0 && "Unexpected ValueType for argument!");
|
|
|
|
case MVT::i1:
|
|
|
|
case MVT::i8:
|
|
|
|
case MVT::i16:
|
|
|
|
// Promote the integer to 32 bits. If the input type is signed use a
|
|
|
|
// sign extend, otherwise use a zero extend.
|
|
|
|
if (Args[i].second->isSigned())
|
|
|
|
Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first);
|
|
|
|
else
|
|
|
|
Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first);
|
|
|
|
// FALL THROUGH
|
|
|
|
case MVT::i32:
|
|
|
|
if (GPR_remaining > 0) {
|
|
|
|
args_to_use.push_back(Args[i].first);
|
|
|
|
--GPR_remaining;
|
|
|
|
} else {
|
|
|
|
MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
|
|
|
|
Args[i].first, PtrOff,
|
|
|
|
DAG.getSrcValue(NULL)));
|
|
|
|
}
|
|
|
|
ArgOffset += 4;
|
|
|
|
break;
|
|
|
|
case MVT::i64:
|
|
|
|
// If we have one free GPR left, we can place the upper half of the i64
|
|
|
|
// in it, and store the other half to the stack. If we have two or more
|
|
|
|
// free GPRs, then we can pass both halves of the i64 in registers.
|
|
|
|
if (GPR_remaining > 0) {
|
|
|
|
SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
|
|
|
|
Args[i].first, DAG.getConstant(1, MVT::i32));
|
|
|
|
SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
|
|
|
|
Args[i].first, DAG.getConstant(0, MVT::i32));
|
|
|
|
args_to_use.push_back(Hi);
|
|
|
|
--GPR_remaining;
|
2005-08-17 01:14:42 +08:00
|
|
|
if (GPR_remaining > 0) {
|
2005-08-30 08:19:00 +08:00
|
|
|
args_to_use.push_back(Lo);
|
2005-08-17 01:14:42 +08:00
|
|
|
--GPR_remaining;
|
|
|
|
} else {
|
2005-08-30 08:19:00 +08:00
|
|
|
SDOperand ConstFour = DAG.getConstant(4, getPointerTy());
|
|
|
|
PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour);
|
2005-08-17 01:14:42 +08:00
|
|
|
MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
|
2005-08-30 08:19:00 +08:00
|
|
|
Lo, PtrOff, DAG.getSrcValue(NULL)));
|
2005-08-17 01:14:42 +08:00
|
|
|
}
|
2005-08-30 08:19:00 +08:00
|
|
|
} else {
|
|
|
|
MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
|
|
|
|
Args[i].first, PtrOff,
|
|
|
|
DAG.getSrcValue(NULL)));
|
|
|
|
}
|
|
|
|
ArgOffset += 8;
|
|
|
|
break;
|
|
|
|
case MVT::f32:
|
|
|
|
case MVT::f64:
|
|
|
|
if (FPR_remaining > 0) {
|
|
|
|
args_to_use.push_back(Args[i].first);
|
|
|
|
--FPR_remaining;
|
|
|
|
if (isVarArg) {
|
|
|
|
SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Chain,
|
|
|
|
Args[i].first, PtrOff,
|
|
|
|
DAG.getSrcValue(NULL));
|
|
|
|
MemOps.push_back(Store);
|
|
|
|
// Float varargs are always shadowed in available integer registers
|
2005-08-17 01:14:42 +08:00
|
|
|
if (GPR_remaining > 0) {
|
2005-08-30 08:19:00 +08:00
|
|
|
SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,
|
|
|
|
DAG.getSrcValue(NULL));
|
2005-11-18 02:30:17 +08:00
|
|
|
MemOps.push_back(Load.getValue(1));
|
2005-08-30 08:19:00 +08:00
|
|
|
args_to_use.push_back(Load);
|
2005-08-17 01:14:42 +08:00
|
|
|
--GPR_remaining;
|
2005-08-30 08:19:00 +08:00
|
|
|
}
|
|
|
|
if (GPR_remaining > 0 && MVT::f64 == ArgVT) {
|
2005-08-17 01:14:42 +08:00
|
|
|
SDOperand ConstFour = DAG.getConstant(4, getPointerTy());
|
|
|
|
PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour);
|
2005-08-30 08:19:00 +08:00
|
|
|
SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,
|
|
|
|
DAG.getSrcValue(NULL));
|
2005-11-18 02:30:17 +08:00
|
|
|
MemOps.push_back(Load.getValue(1));
|
2005-08-30 08:19:00 +08:00
|
|
|
args_to_use.push_back(Load);
|
|
|
|
--GPR_remaining;
|
2005-08-17 01:14:42 +08:00
|
|
|
}
|
|
|
|
} else {
|
2005-08-30 08:19:00 +08:00
|
|
|
// If we have any FPRs remaining, we may also have GPRs remaining.
|
|
|
|
// Args passed in FPRs consume either 1 (f32) or 2 (f64) available
|
|
|
|
// GPRs.
|
|
|
|
if (GPR_remaining > 0) {
|
|
|
|
args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
|
|
|
|
--GPR_remaining;
|
|
|
|
}
|
|
|
|
if (GPR_remaining > 0 && MVT::f64 == ArgVT) {
|
|
|
|
args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
|
|
|
|
--GPR_remaining;
|
2005-08-17 01:14:42 +08:00
|
|
|
}
|
|
|
|
}
|
2005-08-30 08:19:00 +08:00
|
|
|
} else {
|
|
|
|
MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
|
|
|
|
Args[i].first, PtrOff,
|
|
|
|
DAG.getSrcValue(NULL)));
|
|
|
|
}
|
|
|
|
ArgOffset += (ArgVT == MVT::f32) ? 4 : 8;
|
|
|
|
break;
|
2005-08-17 01:14:42 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!MemOps.empty())
|
|
|
|
Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<MVT::ValueType> RetVals;
|
|
|
|
MVT::ValueType RetTyVT = getValueType(RetTy);
|
2005-09-02 09:24:55 +08:00
|
|
|
MVT::ValueType ActualRetTyVT = RetTyVT;
|
|
|
|
if (RetTyVT >= MVT::i1 && RetTyVT <= MVT::i16)
|
|
|
|
ActualRetTyVT = MVT::i32; // Promote result to i32.
|
|
|
|
|
2006-01-28 15:33:03 +08:00
|
|
|
if (RetTyVT == MVT::i64) {
|
|
|
|
RetVals.push_back(MVT::i32);
|
|
|
|
RetVals.push_back(MVT::i32);
|
|
|
|
} else if (RetTyVT != MVT::isVoid) {
|
2005-09-02 09:24:55 +08:00
|
|
|
RetVals.push_back(ActualRetTyVT);
|
2006-01-28 15:33:03 +08:00
|
|
|
}
|
2005-08-17 01:14:42 +08:00
|
|
|
RetVals.push_back(MVT::Other);
|
|
|
|
|
2005-11-17 13:56:14 +08:00
|
|
|
// If the callee is a GlobalAddress node (quite common, every direct call is)
|
|
|
|
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
|
|
|
|
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
|
|
|
|
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32);
|
|
|
|
|
2006-01-28 07:34:02 +08:00
|
|
|
std::vector<SDOperand> Ops;
|
|
|
|
Ops.push_back(Chain);
|
|
|
|
Ops.push_back(Callee);
|
|
|
|
Ops.insert(Ops.end(), args_to_use.begin(), args_to_use.end());
|
|
|
|
SDOperand TheCall = DAG.getNode(PPCISD::CALL, RetVals, Ops);
|
2006-01-28 15:33:03 +08:00
|
|
|
Chain = TheCall.getValue(TheCall.Val->getNumValues()-1);
|
2005-08-17 01:14:42 +08:00
|
|
|
Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
|
|
|
|
DAG.getConstant(NumBytes, getPointerTy()));
|
2005-09-02 09:24:55 +08:00
|
|
|
SDOperand RetVal = TheCall;
|
|
|
|
|
|
|
|
// If the result is a small value, add a note so that we keep track of the
|
|
|
|
// information about whether it is sign or zero extended.
|
|
|
|
if (RetTyVT != ActualRetTyVT) {
|
|
|
|
RetVal = DAG.getNode(RetTy->isSigned() ? ISD::AssertSext : ISD::AssertZext,
|
|
|
|
MVT::i32, RetVal, DAG.getValueType(RetTyVT));
|
|
|
|
RetVal = DAG.getNode(ISD::TRUNCATE, RetTyVT, RetVal);
|
2006-01-28 15:33:03 +08:00
|
|
|
} else if (RetTyVT == MVT::i64) {
|
|
|
|
RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, RetVal, RetVal.getValue(1));
|
2005-09-02 09:24:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return std::make_pair(RetVal, Chain);
|
2005-08-17 01:14:42 +08:00
|
|
|
}
|
|
|
|
|
2005-08-27 05:23:58 +08:00
|
|
|
MachineBasicBlock *
|
2005-10-16 13:39:50 +08:00
|
|
|
PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
|
|
|
|
MachineBasicBlock *BB) {
|
2005-08-27 05:23:58 +08:00
|
|
|
assert((MI->getOpcode() == PPC::SELECT_CC_Int ||
|
2005-10-01 09:35:02 +08:00
|
|
|
MI->getOpcode() == PPC::SELECT_CC_F4 ||
|
|
|
|
MI->getOpcode() == PPC::SELECT_CC_F8) &&
|
2005-08-27 05:23:58 +08:00
|
|
|
"Unexpected instr type to insert");
|
|
|
|
|
|
|
|
// To "insert" a SELECT_CC instruction, we actually have to insert the diamond
|
|
|
|
// control-flow pattern. The incoming instruction knows the destination vreg
|
|
|
|
// to set, the condition code register to branch on, the true/false values to
|
|
|
|
// select between, and a branch opcode to use.
|
|
|
|
const BasicBlock *LLVM_BB = BB->getBasicBlock();
|
|
|
|
ilist<MachineBasicBlock>::iterator It = BB;
|
|
|
|
++It;
|
|
|
|
|
|
|
|
// thisMBB:
|
|
|
|
// ...
|
|
|
|
// TrueVal = ...
|
|
|
|
// cmpTY ccX, r1, r2
|
|
|
|
// bCC copy1MBB
|
|
|
|
// fallthrough --> copy0MBB
|
|
|
|
MachineBasicBlock *thisMBB = BB;
|
|
|
|
MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
|
|
|
|
MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
|
|
|
|
BuildMI(BB, MI->getOperand(4).getImmedValue(), 2)
|
|
|
|
.addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
|
|
|
|
MachineFunction *F = BB->getParent();
|
|
|
|
F->getBasicBlockList().insert(It, copy0MBB);
|
|
|
|
F->getBasicBlockList().insert(It, sinkMBB);
|
2006-03-27 09:32:24 +08:00
|
|
|
// Update machine-CFG edges by first adding all successors of the current
|
|
|
|
// block to the new block which will contain the Phi node for the select.
|
|
|
|
for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
|
|
|
|
e = BB->succ_end(); i != e; ++i)
|
|
|
|
sinkMBB->addSuccessor(*i);
|
|
|
|
// Next, remove all successors of the current block, and add the true
|
|
|
|
// and fallthrough blocks as its successors.
|
|
|
|
while(!BB->succ_empty())
|
|
|
|
BB->removeSuccessor(BB->succ_begin());
|
2005-08-27 05:23:58 +08:00
|
|
|
BB->addSuccessor(copy0MBB);
|
|
|
|
BB->addSuccessor(sinkMBB);
|
|
|
|
|
|
|
|
// copy0MBB:
|
|
|
|
// %FalseValue = ...
|
|
|
|
// # fallthrough to sinkMBB
|
|
|
|
BB = copy0MBB;
|
|
|
|
|
|
|
|
// Update machine-CFG edges
|
|
|
|
BB->addSuccessor(sinkMBB);
|
|
|
|
|
|
|
|
// sinkMBB:
|
|
|
|
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
|
|
|
|
// ...
|
|
|
|
BB = sinkMBB;
|
|
|
|
BuildMI(BB, PPC::PHI, 4, MI->getOperand(0).getReg())
|
|
|
|
.addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
|
|
|
|
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
|
|
|
|
|
|
|
|
delete MI; // The pseudo instruction is gone now.
|
|
|
|
return BB;
|
|
|
|
}
|
|
|
|
|
2006-03-01 12:57:39 +08:00
|
|
|
SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N,
|
|
|
|
DAGCombinerInfo &DCI) const {
|
|
|
|
TargetMachine &TM = getTargetMachine();
|
|
|
|
SelectionDAG &DAG = DCI.DAG;
|
|
|
|
switch (N->getOpcode()) {
|
|
|
|
default: break;
|
|
|
|
case ISD::SINT_TO_FP:
|
|
|
|
if (TM.getSubtarget<PPCSubtarget>().is64Bit()) {
|
When possible, custom lower 32-bit SINT_TO_FP to this:
_foo2:
extsw r2, r3
std r2, -8(r1)
lfd f0, -8(r1)
fcfid f0, f0
frsp f1, f0
blr
instead of this:
_foo2:
lis r2, ha16(LCPI2_0)
lis r4, 17200
xoris r3, r3, 32768
stw r3, -4(r1)
stw r4, -8(r1)
lfs f0, lo16(LCPI2_0)(r2)
lfd f1, -8(r1)
fsub f0, f1, f0
frsp f1, f0
blr
This speeds up Misc/pi from 2.44s->2.09s with LLC and from 3.01->2.18s
with llcbeta (16.7% and 38.1% respectively).
llvm-svn: 26943
2006-03-22 13:30:33 +08:00
|
|
|
if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
|
|
|
|
// Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
|
|
|
|
// We allow the src/dst to be either f32/f64, but the intermediate
|
|
|
|
// type must be i64.
|
|
|
|
if (N->getOperand(0).getValueType() == MVT::i64) {
|
|
|
|
SDOperand Val = N->getOperand(0).getOperand(0);
|
|
|
|
if (Val.getValueType() == MVT::f32) {
|
|
|
|
Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
|
|
|
|
DCI.AddToWorklist(Val.Val);
|
|
|
|
}
|
|
|
|
|
|
|
|
Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val);
|
2006-03-01 12:57:39 +08:00
|
|
|
DCI.AddToWorklist(Val.Val);
|
When possible, custom lower 32-bit SINT_TO_FP to this:
_foo2:
extsw r2, r3
std r2, -8(r1)
lfd f0, -8(r1)
fcfid f0, f0
frsp f1, f0
blr
instead of this:
_foo2:
lis r2, ha16(LCPI2_0)
lis r4, 17200
xoris r3, r3, 32768
stw r3, -4(r1)
stw r4, -8(r1)
lfs f0, lo16(LCPI2_0)(r2)
lfd f1, -8(r1)
fsub f0, f1, f0
frsp f1, f0
blr
This speeds up Misc/pi from 2.44s->2.09s with LLC and from 3.01->2.18s
with llcbeta (16.7% and 38.1% respectively).
llvm-svn: 26943
2006-03-22 13:30:33 +08:00
|
|
|
Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val);
|
2006-03-01 12:57:39 +08:00
|
|
|
DCI.AddToWorklist(Val.Val);
|
When possible, custom lower 32-bit SINT_TO_FP to this:
_foo2:
extsw r2, r3
std r2, -8(r1)
lfd f0, -8(r1)
fcfid f0, f0
frsp f1, f0
blr
instead of this:
_foo2:
lis r2, ha16(LCPI2_0)
lis r4, 17200
xoris r3, r3, 32768
stw r3, -4(r1)
stw r4, -8(r1)
lfs f0, lo16(LCPI2_0)(r2)
lfd f1, -8(r1)
fsub f0, f1, f0
frsp f1, f0
blr
This speeds up Misc/pi from 2.44s->2.09s with LLC and from 3.01->2.18s
with llcbeta (16.7% and 38.1% respectively).
llvm-svn: 26943
2006-03-22 13:30:33 +08:00
|
|
|
if (N->getValueType(0) == MVT::f32) {
|
|
|
|
Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val);
|
|
|
|
DCI.AddToWorklist(Val.Val);
|
|
|
|
}
|
|
|
|
return Val;
|
|
|
|
} else if (N->getOperand(0).getValueType() == MVT::i32) {
|
|
|
|
// If the intermediate type is i32, we can avoid the load/store here
|
|
|
|
// too.
|
2006-03-01 12:57:39 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
2006-03-01 13:50:56 +08:00
|
|
|
case ISD::STORE:
|
|
|
|
// Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
|
|
|
|
if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
|
|
|
|
N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
|
|
|
|
N->getOperand(1).getValueType() == MVT::i32) {
|
|
|
|
SDOperand Val = N->getOperand(1).getOperand(0);
|
|
|
|
if (Val.getValueType() == MVT::f32) {
|
|
|
|
Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
|
|
|
|
DCI.AddToWorklist(Val.Val);
|
|
|
|
}
|
|
|
|
Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val);
|
|
|
|
DCI.AddToWorklist(Val.Val);
|
|
|
|
|
|
|
|
Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val,
|
|
|
|
N->getOperand(2), N->getOperand(3));
|
|
|
|
DCI.AddToWorklist(Val.Val);
|
|
|
|
return Val;
|
|
|
|
}
|
|
|
|
break;
|
Implement an item from the readme, folding vcmp/vcmp. instructions with
identical instructions into a single instruction. For example, for:
void test(vector float *x, vector float *y, int *P) {
int v = vec_any_out(*x, *y);
*x = (vector float)vec_cmpb(*x, *y);
*P = v;
}
we now generate:
_test:
mfspr r2, 256
oris r6, r2, 49152
mtspr 256, r6
lvx v0, 0, r4
lvx v1, 0, r3
vcmpbfp. v0, v1, v0
mfcr r4, 2
stvx v0, 0, r3
rlwinm r3, r4, 27, 31, 31
xori r3, r3, 1
stw r3, 0(r5)
mtspr 256, r2
blr
instead of:
_test:
mfspr r2, 256
oris r6, r2, 57344
mtspr 256, r6
lvx v0, 0, r4
lvx v1, 0, r3
vcmpbfp. v2, v1, v0
mfcr r4, 2
*** vcmpbfp v0, v1, v0
rlwinm r4, r4, 27, 31, 31
stvx v0, 0, r3
xori r3, r4, 1
stw r3, 0(r5)
mtspr 256, r2
blr
Testcase here: CodeGen/PowerPC/vcmp-fold.ll
llvm-svn: 27290
2006-03-31 14:02:07 +08:00
|
|
|
case PPCISD::VCMP: {
|
|
|
|
// If a VCMPo node already exists with exactly the same operands as this
|
|
|
|
// node, use its result instead of this node (VCMPo computes both a CR6 and
|
|
|
|
// a normal output).
|
|
|
|
//
|
|
|
|
if (!N->getOperand(0).hasOneUse() &&
|
|
|
|
!N->getOperand(1).hasOneUse() &&
|
|
|
|
!N->getOperand(2).hasOneUse()) {
|
|
|
|
|
|
|
|
// Scan all of the users of the LHS, looking for VCMPo's that match.
|
|
|
|
SDNode *VCMPoNode = 0;
|
|
|
|
|
|
|
|
SDNode *LHSN = N->getOperand(0).Val;
|
|
|
|
for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
|
|
|
|
UI != E; ++UI)
|
|
|
|
if ((*UI)->getOpcode() == PPCISD::VCMPo &&
|
|
|
|
(*UI)->getOperand(1) == N->getOperand(1) &&
|
|
|
|
(*UI)->getOperand(2) == N->getOperand(2) &&
|
|
|
|
(*UI)->getOperand(0) == N->getOperand(0)) {
|
|
|
|
VCMPoNode = *UI;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there are non-zero uses of the flag value, use the VCMPo node!
|
2006-03-31 14:04:53 +08:00
|
|
|
if (VCMPoNode && !VCMPoNode->hasNUsesOfValue(0, 1))
|
Implement an item from the readme, folding vcmp/vcmp. instructions with
identical instructions into a single instruction. For example, for:
void test(vector float *x, vector float *y, int *P) {
int v = vec_any_out(*x, *y);
*x = (vector float)vec_cmpb(*x, *y);
*P = v;
}
we now generate:
_test:
mfspr r2, 256
oris r6, r2, 49152
mtspr 256, r6
lvx v0, 0, r4
lvx v1, 0, r3
vcmpbfp. v0, v1, v0
mfcr r4, 2
stvx v0, 0, r3
rlwinm r3, r4, 27, 31, 31
xori r3, r3, 1
stw r3, 0(r5)
mtspr 256, r2
blr
instead of:
_test:
mfspr r2, 256
oris r6, r2, 57344
mtspr 256, r6
lvx v0, 0, r4
lvx v1, 0, r3
vcmpbfp. v2, v1, v0
mfcr r4, 2
*** vcmpbfp v0, v1, v0
rlwinm r4, r4, 27, 31, 31
stvx v0, 0, r3
xori r3, r4, 1
stw r3, 0(r5)
mtspr 256, r2
blr
Testcase here: CodeGen/PowerPC/vcmp-fold.ll
llvm-svn: 27290
2006-03-31 14:02:07 +08:00
|
|
|
return SDOperand(VCMPoNode, 0);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2006-03-01 12:57:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return SDOperand();
|
|
|
|
}
|
|
|
|
|
2006-04-02 14:26:07 +08:00
|
|
|
void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
|
|
|
|
uint64_t Mask,
|
|
|
|
uint64_t &KnownZero,
|
|
|
|
uint64_t &KnownOne,
|
|
|
|
unsigned Depth) const {
|
|
|
|
KnownZero = 0;
|
|
|
|
KnownOne = 0;
|
|
|
|
switch (Op.getOpcode()) {
|
|
|
|
default: break;
|
|
|
|
case ISD::INTRINSIC_WO_CHAIN: {
|
|
|
|
switch (cast<ConstantSDNode>(Op.getOperand(0))->getValue()) {
|
|
|
|
default: break;
|
|
|
|
case Intrinsic::ppc_altivec_vcmpbfp_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpeqfp_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpequb_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpequh_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpequw_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgefp_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtfp_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtsb_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtsh_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtsw_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtub_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtuh_p:
|
|
|
|
case Intrinsic::ppc_altivec_vcmpgtuw_p:
|
|
|
|
KnownZero = ~1U; // All bits but the low one are known to be zero.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-02-08 04:16:30 +08:00
|
|
|
/// getConstraintType - Given a constraint letter, return the type of
|
|
|
|
/// constraint it is for this target.
|
|
|
|
PPCTargetLowering::ConstraintType
|
|
|
|
PPCTargetLowering::getConstraintType(char ConstraintLetter) const {
|
|
|
|
switch (ConstraintLetter) {
|
|
|
|
default: break;
|
|
|
|
case 'b':
|
|
|
|
case 'r':
|
|
|
|
case 'f':
|
|
|
|
case 'v':
|
|
|
|
case 'y':
|
|
|
|
return C_RegisterClass;
|
|
|
|
}
|
|
|
|
return TargetLowering::getConstraintType(ConstraintLetter);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-02-01 03:20:21 +08:00
|
|
|
std::vector<unsigned> PPCTargetLowering::
|
2006-02-22 08:56:39 +08:00
|
|
|
getRegClassForInlineAsmConstraint(const std::string &Constraint,
|
|
|
|
MVT::ValueType VT) const {
|
2006-02-01 03:20:21 +08:00
|
|
|
if (Constraint.size() == 1) {
|
|
|
|
switch (Constraint[0]) { // GCC RS6000 Constraint Letters
|
|
|
|
default: break; // Unknown constriant letter
|
|
|
|
case 'b':
|
|
|
|
return make_vector<unsigned>(/*no R0*/ PPC::R1 , PPC::R2 , PPC::R3 ,
|
|
|
|
PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 ,
|
|
|
|
PPC::R8 , PPC::R9 , PPC::R10, PPC::R11,
|
|
|
|
PPC::R12, PPC::R13, PPC::R14, PPC::R15,
|
|
|
|
PPC::R16, PPC::R17, PPC::R18, PPC::R19,
|
|
|
|
PPC::R20, PPC::R21, PPC::R22, PPC::R23,
|
|
|
|
PPC::R24, PPC::R25, PPC::R26, PPC::R27,
|
|
|
|
PPC::R28, PPC::R29, PPC::R30, PPC::R31,
|
|
|
|
0);
|
|
|
|
case 'r':
|
|
|
|
return make_vector<unsigned>(PPC::R0 , PPC::R1 , PPC::R2 , PPC::R3 ,
|
|
|
|
PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 ,
|
|
|
|
PPC::R8 , PPC::R9 , PPC::R10, PPC::R11,
|
|
|
|
PPC::R12, PPC::R13, PPC::R14, PPC::R15,
|
|
|
|
PPC::R16, PPC::R17, PPC::R18, PPC::R19,
|
|
|
|
PPC::R20, PPC::R21, PPC::R22, PPC::R23,
|
|
|
|
PPC::R24, PPC::R25, PPC::R26, PPC::R27,
|
|
|
|
PPC::R28, PPC::R29, PPC::R30, PPC::R31,
|
|
|
|
0);
|
|
|
|
case 'f':
|
|
|
|
return make_vector<unsigned>(PPC::F0 , PPC::F1 , PPC::F2 , PPC::F3 ,
|
|
|
|
PPC::F4 , PPC::F5 , PPC::F6 , PPC::F7 ,
|
|
|
|
PPC::F8 , PPC::F9 , PPC::F10, PPC::F11,
|
|
|
|
PPC::F12, PPC::F13, PPC::F14, PPC::F15,
|
|
|
|
PPC::F16, PPC::F17, PPC::F18, PPC::F19,
|
|
|
|
PPC::F20, PPC::F21, PPC::F22, PPC::F23,
|
|
|
|
PPC::F24, PPC::F25, PPC::F26, PPC::F27,
|
|
|
|
PPC::F28, PPC::F29, PPC::F30, PPC::F31,
|
|
|
|
0);
|
|
|
|
case 'v':
|
|
|
|
return make_vector<unsigned>(PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 ,
|
|
|
|
PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
|
|
|
|
PPC::V8 , PPC::V9 , PPC::V10, PPC::V11,
|
|
|
|
PPC::V12, PPC::V13, PPC::V14, PPC::V15,
|
|
|
|
PPC::V16, PPC::V17, PPC::V18, PPC::V19,
|
|
|
|
PPC::V20, PPC::V21, PPC::V22, PPC::V23,
|
|
|
|
PPC::V24, PPC::V25, PPC::V26, PPC::V27,
|
|
|
|
PPC::V28, PPC::V29, PPC::V30, PPC::V31,
|
|
|
|
0);
|
|
|
|
case 'y':
|
|
|
|
return make_vector<unsigned>(PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
|
|
|
|
PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7,
|
|
|
|
0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-02-22 08:56:39 +08:00
|
|
|
return std::vector<unsigned>();
|
2006-02-01 03:20:21 +08:00
|
|
|
}
|
2006-02-07 08:47:13 +08:00
|
|
|
|
|
|
|
// isOperandValidForConstraint
|
|
|
|
bool PPCTargetLowering::
|
|
|
|
isOperandValidForConstraint(SDOperand Op, char Letter) {
|
|
|
|
switch (Letter) {
|
|
|
|
default: break;
|
|
|
|
case 'I':
|
|
|
|
case 'J':
|
|
|
|
case 'K':
|
|
|
|
case 'L':
|
|
|
|
case 'M':
|
|
|
|
case 'N':
|
|
|
|
case 'O':
|
|
|
|
case 'P': {
|
|
|
|
if (!isa<ConstantSDNode>(Op)) return false; // Must be an immediate.
|
|
|
|
unsigned Value = cast<ConstantSDNode>(Op)->getValue();
|
|
|
|
switch (Letter) {
|
|
|
|
default: assert(0 && "Unknown constraint letter!");
|
|
|
|
case 'I': // "I" is a signed 16-bit constant.
|
|
|
|
return (short)Value == (int)Value;
|
|
|
|
case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
|
|
|
|
case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
|
|
|
|
return (short)Value == 0;
|
|
|
|
case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
|
|
|
|
return (Value >> 16) == 0;
|
|
|
|
case 'M': // "M" is a constant that is greater than 31.
|
|
|
|
return Value > 31;
|
|
|
|
case 'N': // "N" is a positive constant that is an exact power of two.
|
|
|
|
return (int)Value > 0 && isPowerOf2_32(Value);
|
|
|
|
case 'O': // "O" is the constant zero.
|
|
|
|
return Value == 0;
|
|
|
|
case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
|
|
|
|
return (short)-Value == (int)-Value;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle standard constraint letters.
|
|
|
|
return TargetLowering::isOperandValidForConstraint(Op, Letter);
|
|
|
|
}
|
2006-03-14 07:20:37 +08:00
|
|
|
|
|
|
|
/// isLegalAddressImmediate - Return true if the integer value can be used
|
|
|
|
/// as the offset of the target addressing mode.
|
|
|
|
bool PPCTargetLowering::isLegalAddressImmediate(int64_t V) const {
|
|
|
|
// PPC allows a sign-extended 16-bit immediate field.
|
|
|
|
return (V > -(1 << 16) && V < (1 << 16)-1);
|
|
|
|
}
|