forked from OSchip/llvm-project
1851 lines
57 KiB
C++
1851 lines
57 KiB
C++
//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//==-----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements the interfaces that AMDIL uses to lower LLVM code into a
|
|
// selection DAG.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "AMDILISelLowering.h"
|
|
#include "AMDILDevices.h"
|
|
#include "AMDILIntrinsicInfo.h"
|
|
#include "AMDILRegisterInfo.h"
|
|
#include "AMDILSubtarget.h"
|
|
#include "AMDILUtilityFunctions.h"
|
|
#include "llvm/CallingConv.h"
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
#include "llvm/CodeGen/PseudoSourceValue.h"
|
|
#include "llvm/CodeGen/SelectionDAG.h"
|
|
#include "llvm/CodeGen/SelectionDAGNodes.h"
|
|
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
|
|
#include "llvm/DerivedTypes.h"
|
|
#include "llvm/Instructions.h"
|
|
#include "llvm/Intrinsics.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
#include "llvm/Target/TargetInstrInfo.h"
|
|
#include "llvm/Target/TargetOptions.h"
|
|
|
|
using namespace llvm;
|
|
#define ISDBITCAST ISD::BITCAST
|
|
#define MVTGLUE MVT::Glue
|
|
//===----------------------------------------------------------------------===//
|
|
// Calling Convention Implementation
|
|
//===----------------------------------------------------------------------===//
|
|
#include "AMDGPUGenCallingConv.inc"
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// TargetLowering Implementation Help Functions Begin
|
|
//===----------------------------------------------------------------------===//
|
|
static SDValue
|
|
getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
|
|
{
|
|
DebugLoc DL = Src.getDebugLoc();
|
|
EVT svt = Src.getValueType().getScalarType();
|
|
EVT dvt = Dst.getValueType().getScalarType();
|
|
if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
|
|
if (dvt.bitsGT(svt)) {
|
|
Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
|
|
} else if (svt.bitsLT(svt)) {
|
|
Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
|
|
DAG.getConstant(1, MVT::i32));
|
|
}
|
|
} else if (svt.isInteger() && dvt.isInteger()) {
|
|
if (!svt.bitsEq(dvt)) {
|
|
Src = DAG.getSExtOrTrunc(Src, DL, dvt);
|
|
}
|
|
} else if (svt.isInteger()) {
|
|
unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
|
|
if (!svt.bitsEq(dvt)) {
|
|
if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
|
|
Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
|
|
} else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
|
|
Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
|
|
} else {
|
|
assert(0 && "We only support 32 and 64bit fp types");
|
|
}
|
|
}
|
|
Src = DAG.getNode(opcode, DL, dvt, Src);
|
|
} else if (dvt.isInteger()) {
|
|
unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
|
|
if (svt.getSimpleVT().SimpleTy == MVT::f32) {
|
|
Src = DAG.getNode(opcode, DL, MVT::i32, Src);
|
|
} else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
|
|
Src = DAG.getNode(opcode, DL, MVT::i64, Src);
|
|
} else {
|
|
assert(0 && "We only support 32 and 64bit fp types");
|
|
}
|
|
Src = DAG.getSExtOrTrunc(Src, DL, dvt);
|
|
}
|
|
return Src;
|
|
}
|
|
// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
|
|
// condition.
|
|
static AMDILCC::CondCodes
|
|
CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
|
|
{
|
|
switch (CC) {
|
|
default:
|
|
{
|
|
errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
|
|
assert(0 && "Unknown condition code!");
|
|
}
|
|
case ISD::SETO:
|
|
switch(type) {
|
|
case MVT::f32:
|
|
return AMDILCC::IL_CC_F_O;
|
|
case MVT::f64:
|
|
return AMDILCC::IL_CC_D_O;
|
|
default:
|
|
assert(0 && "Opcode combination not generated correctly!");
|
|
return AMDILCC::COND_ERROR;
|
|
};
|
|
case ISD::SETUO:
|
|
switch(type) {
|
|
case MVT::f32:
|
|
return AMDILCC::IL_CC_F_UO;
|
|
case MVT::f64:
|
|
return AMDILCC::IL_CC_D_UO;
|
|
default:
|
|
assert(0 && "Opcode combination not generated correctly!");
|
|
return AMDILCC::COND_ERROR;
|
|
};
|
|
case ISD::SETGT:
|
|
switch (type) {
|
|
case MVT::i1:
|
|
case MVT::i8:
|
|
case MVT::i16:
|
|
case MVT::i32:
|
|
return AMDILCC::IL_CC_I_GT;
|
|
case MVT::f32:
|
|
return AMDILCC::IL_CC_F_GT;
|
|
case MVT::f64:
|
|
return AMDILCC::IL_CC_D_GT;
|
|
case MVT::i64:
|
|
return AMDILCC::IL_CC_L_GT;
|
|
default:
|
|
assert(0 && "Opcode combination not generated correctly!");
|
|
return AMDILCC::COND_ERROR;
|
|
};
|
|
case ISD::SETGE:
|
|
switch (type) {
|
|
case MVT::i1:
|
|
case MVT::i8:
|
|
case MVT::i16:
|
|
case MVT::i32:
|
|
return AMDILCC::IL_CC_I_GE;
|
|
case MVT::f32:
|
|
return AMDILCC::IL_CC_F_GE;
|
|
case MVT::f64:
|
|
return AMDILCC::IL_CC_D_GE;
|
|
case MVT::i64:
|
|
return AMDILCC::IL_CC_L_GE;
|
|
default:
|
|
assert(0 && "Opcode combination not generated correctly!");
|
|
return AMDILCC::COND_ERROR;
|
|
};
|
|
case ISD::SETLT:
|
|
switch (type) {
|
|
case MVT::i1:
|
|
case MVT::i8:
|
|
case MVT::i16:
|
|
case MVT::i32:
|
|
return AMDILCC::IL_CC_I_LT;
|
|
case MVT::f32:
|
|
return AMDILCC::IL_CC_F_LT;
|
|
case MVT::f64:
|
|
return AMDILCC::IL_CC_D_LT;
|
|
case MVT::i64:
|
|
return AMDILCC::IL_CC_L_LT;
|
|
default:
|
|
assert(0 && "Opcode combination not generated correctly!");
|
|
return AMDILCC::COND_ERROR;
|
|
};
|
|
case ISD::SETLE:
|
|
switch (type) {
|
|
case MVT::i1:
|
|
case MVT::i8:
|
|
case MVT::i16:
|
|
case MVT::i32:
|
|
return AMDILCC::IL_CC_I_LE;
|
|
case MVT::f32:
|
|
return AMDILCC::IL_CC_F_LE;
|
|
case MVT::f64:
|
|
return AMDILCC::IL_CC_D_LE;
|
|
case MVT::i64:
|
|
return AMDILCC::IL_CC_L_LE;
|
|
default:
|
|
assert(0 && "Opcode combination not generated correctly!");
|
|
return AMDILCC::COND_ERROR;
|
|
};
|
|
case ISD::SETNE:
|
|
switch (type) {
|
|
case MVT::i1:
|
|
case MVT::i8:
|
|
case MVT::i16:
|
|
case MVT::i32:
|
|
return AMDILCC::IL_CC_I_NE;
|
|
case MVT::f32:
|
|
return AMDILCC::IL_CC_F_NE;
|
|
case MVT::f64:
|
|
return AMDILCC::IL_CC_D_NE;
|
|
case MVT::i64:
|
|
return AMDILCC::IL_CC_L_NE;
|
|
default:
|
|
assert(0 && "Opcode combination not generated correctly!");
|
|
return AMDILCC::COND_ERROR;
|
|
};
|
|
case ISD::SETEQ:
|
|
switch (type) {
|
|
case MVT::i1:
|
|
case MVT::i8:
|
|
case MVT::i16:
|
|
case MVT::i32:
|
|
return AMDILCC::IL_CC_I_EQ;
|
|
case MVT::f32:
|
|
return AMDILCC::IL_CC_F_EQ;
|
|
case MVT::f64:
|
|
return AMDILCC::IL_CC_D_EQ;
|
|
case MVT::i64:
|
|
return AMDILCC::IL_CC_L_EQ;
|
|
default:
|
|
assert(0 && "Opcode combination not generated correctly!");
|
|
return AMDILCC::COND_ERROR;
|
|
};
|
|
case ISD::SETUGT:
|
|
switch (type) {
|
|
case MVT::i1:
|
|
case MVT::i8:
|
|
case MVT::i16:
|
|
case MVT::i32:
|
|
return AMDILCC::IL_CC_U_GT;
|
|
case MVT::f32:
|
|
return AMDILCC::IL_CC_F_UGT;
|
|
case MVT::f64:
|
|
return AMDILCC::IL_CC_D_UGT;
|
|
case MVT::i64:
|
|
return AMDILCC::IL_CC_UL_GT;
|
|
default:
|
|
assert(0 && "Opcode combination not generated correctly!");
|
|
return AMDILCC::COND_ERROR;
|
|
};
|
|
case ISD::SETUGE:
|
|
switch (type) {
|
|
case MVT::i1:
|
|
case MVT::i8:
|
|
case MVT::i16:
|
|
case MVT::i32:
|
|
return AMDILCC::IL_CC_U_GE;
|
|
case MVT::f32:
|
|
return AMDILCC::IL_CC_F_UGE;
|
|
case MVT::f64:
|
|
return AMDILCC::IL_CC_D_UGE;
|
|
case MVT::i64:
|
|
return AMDILCC::IL_CC_UL_GE;
|
|
default:
|
|
assert(0 && "Opcode combination not generated correctly!");
|
|
return AMDILCC::COND_ERROR;
|
|
};
|
|
case ISD::SETULT:
|
|
switch (type) {
|
|
case MVT::i1:
|
|
case MVT::i8:
|
|
case MVT::i16:
|
|
case MVT::i32:
|
|
return AMDILCC::IL_CC_U_LT;
|
|
case MVT::f32:
|
|
return AMDILCC::IL_CC_F_ULT;
|
|
case MVT::f64:
|
|
return AMDILCC::IL_CC_D_ULT;
|
|
case MVT::i64:
|
|
return AMDILCC::IL_CC_UL_LT;
|
|
default:
|
|
assert(0 && "Opcode combination not generated correctly!");
|
|
return AMDILCC::COND_ERROR;
|
|
};
|
|
case ISD::SETULE:
|
|
switch (type) {
|
|
case MVT::i1:
|
|
case MVT::i8:
|
|
case MVT::i16:
|
|
case MVT::i32:
|
|
return AMDILCC::IL_CC_U_LE;
|
|
case MVT::f32:
|
|
return AMDILCC::IL_CC_F_ULE;
|
|
case MVT::f64:
|
|
return AMDILCC::IL_CC_D_ULE;
|
|
case MVT::i64:
|
|
return AMDILCC::IL_CC_UL_LE;
|
|
default:
|
|
assert(0 && "Opcode combination not generated correctly!");
|
|
return AMDILCC::COND_ERROR;
|
|
};
|
|
case ISD::SETUNE:
|
|
switch (type) {
|
|
case MVT::i1:
|
|
case MVT::i8:
|
|
case MVT::i16:
|
|
case MVT::i32:
|
|
return AMDILCC::IL_CC_U_NE;
|
|
case MVT::f32:
|
|
return AMDILCC::IL_CC_F_UNE;
|
|
case MVT::f64:
|
|
return AMDILCC::IL_CC_D_UNE;
|
|
case MVT::i64:
|
|
return AMDILCC::IL_CC_UL_NE;
|
|
default:
|
|
assert(0 && "Opcode combination not generated correctly!");
|
|
return AMDILCC::COND_ERROR;
|
|
};
|
|
case ISD::SETUEQ:
|
|
switch (type) {
|
|
case MVT::i1:
|
|
case MVT::i8:
|
|
case MVT::i16:
|
|
case MVT::i32:
|
|
return AMDILCC::IL_CC_U_EQ;
|
|
case MVT::f32:
|
|
return AMDILCC::IL_CC_F_UEQ;
|
|
case MVT::f64:
|
|
return AMDILCC::IL_CC_D_UEQ;
|
|
case MVT::i64:
|
|
return AMDILCC::IL_CC_UL_EQ;
|
|
default:
|
|
assert(0 && "Opcode combination not generated correctly!");
|
|
return AMDILCC::COND_ERROR;
|
|
};
|
|
case ISD::SETOGT:
|
|
switch (type) {
|
|
case MVT::f32:
|
|
return AMDILCC::IL_CC_F_OGT;
|
|
case MVT::f64:
|
|
return AMDILCC::IL_CC_D_OGT;
|
|
case MVT::i1:
|
|
case MVT::i8:
|
|
case MVT::i16:
|
|
case MVT::i32:
|
|
case MVT::i64:
|
|
default:
|
|
assert(0 && "Opcode combination not generated correctly!");
|
|
return AMDILCC::COND_ERROR;
|
|
};
|
|
case ISD::SETOGE:
|
|
switch (type) {
|
|
case MVT::f32:
|
|
return AMDILCC::IL_CC_F_OGE;
|
|
case MVT::f64:
|
|
return AMDILCC::IL_CC_D_OGE;
|
|
case MVT::i1:
|
|
case MVT::i8:
|
|
case MVT::i16:
|
|
case MVT::i32:
|
|
case MVT::i64:
|
|
default:
|
|
assert(0 && "Opcode combination not generated correctly!");
|
|
return AMDILCC::COND_ERROR;
|
|
};
|
|
case ISD::SETOLT:
|
|
switch (type) {
|
|
case MVT::f32:
|
|
return AMDILCC::IL_CC_F_OLT;
|
|
case MVT::f64:
|
|
return AMDILCC::IL_CC_D_OLT;
|
|
case MVT::i1:
|
|
case MVT::i8:
|
|
case MVT::i16:
|
|
case MVT::i32:
|
|
case MVT::i64:
|
|
default:
|
|
assert(0 && "Opcode combination not generated correctly!");
|
|
return AMDILCC::COND_ERROR;
|
|
};
|
|
case ISD::SETOLE:
|
|
switch (type) {
|
|
case MVT::f32:
|
|
return AMDILCC::IL_CC_F_OLE;
|
|
case MVT::f64:
|
|
return AMDILCC::IL_CC_D_OLE;
|
|
case MVT::i1:
|
|
case MVT::i8:
|
|
case MVT::i16:
|
|
case MVT::i32:
|
|
case MVT::i64:
|
|
default:
|
|
assert(0 && "Opcode combination not generated correctly!");
|
|
return AMDILCC::COND_ERROR;
|
|
};
|
|
case ISD::SETONE:
|
|
switch (type) {
|
|
case MVT::f32:
|
|
return AMDILCC::IL_CC_F_ONE;
|
|
case MVT::f64:
|
|
return AMDILCC::IL_CC_D_ONE;
|
|
case MVT::i1:
|
|
case MVT::i8:
|
|
case MVT::i16:
|
|
case MVT::i32:
|
|
case MVT::i64:
|
|
default:
|
|
assert(0 && "Opcode combination not generated correctly!");
|
|
return AMDILCC::COND_ERROR;
|
|
};
|
|
case ISD::SETOEQ:
|
|
switch (type) {
|
|
case MVT::f32:
|
|
return AMDILCC::IL_CC_F_OEQ;
|
|
case MVT::f64:
|
|
return AMDILCC::IL_CC_D_OEQ;
|
|
case MVT::i1:
|
|
case MVT::i8:
|
|
case MVT::i16:
|
|
case MVT::i32:
|
|
case MVT::i64:
|
|
default:
|
|
assert(0 && "Opcode combination not generated correctly!");
|
|
return AMDILCC::COND_ERROR;
|
|
};
|
|
};
|
|
}
|
|
|
|
SDValue
|
|
AMDILTargetLowering::LowerMemArgument(
|
|
SDValue Chain,
|
|
CallingConv::ID CallConv,
|
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
|
DebugLoc dl, SelectionDAG &DAG,
|
|
const CCValAssign &VA,
|
|
MachineFrameInfo *MFI,
|
|
unsigned i) const
|
|
{
|
|
// Create the nodes corresponding to a load from this parameter slot.
|
|
ISD::ArgFlagsTy Flags = Ins[i].Flags;
|
|
|
|
bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
|
|
getTargetMachine().Options.GuaranteedTailCallOpt;
|
|
bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
|
|
|
|
// FIXME: For now, all byval parameter objects are marked mutable. This can
|
|
// be changed with more analysis.
|
|
// In case of tail call optimization mark all arguments mutable. Since they
|
|
// could be overwritten by lowering of arguments in case of a tail call.
|
|
int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
|
|
VA.getLocMemOffset(), isImmutable);
|
|
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
|
|
|
|
if (Flags.isByVal())
|
|
return FIN;
|
|
return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
|
|
MachinePointerInfo::getFixedStack(FI),
|
|
false, false, false, 0);
|
|
}
|
|
//===----------------------------------------------------------------------===//
|
|
// TargetLowering Implementation Help Functions End
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// TargetLowering Class Implementation Begins
|
|
//===----------------------------------------------------------------------===//
|
|
AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
|
|
: TargetLowering(TM, new TargetLoweringObjectFileELF())
|
|
{
|
|
int types[] =
|
|
{
|
|
(int)MVT::i8,
|
|
(int)MVT::i16,
|
|
(int)MVT::i32,
|
|
(int)MVT::f32,
|
|
(int)MVT::f64,
|
|
(int)MVT::i64,
|
|
(int)MVT::v2i8,
|
|
(int)MVT::v4i8,
|
|
(int)MVT::v2i16,
|
|
(int)MVT::v4i16,
|
|
(int)MVT::v4f32,
|
|
(int)MVT::v4i32,
|
|
(int)MVT::v2f32,
|
|
(int)MVT::v2i32,
|
|
(int)MVT::v2f64,
|
|
(int)MVT::v2i64
|
|
};
|
|
|
|
int IntTypes[] =
|
|
{
|
|
(int)MVT::i8,
|
|
(int)MVT::i16,
|
|
(int)MVT::i32,
|
|
(int)MVT::i64
|
|
};
|
|
|
|
int FloatTypes[] =
|
|
{
|
|
(int)MVT::f32,
|
|
(int)MVT::f64
|
|
};
|
|
|
|
int VectorTypes[] =
|
|
{
|
|
(int)MVT::v2i8,
|
|
(int)MVT::v4i8,
|
|
(int)MVT::v2i16,
|
|
(int)MVT::v4i16,
|
|
(int)MVT::v4f32,
|
|
(int)MVT::v4i32,
|
|
(int)MVT::v2f32,
|
|
(int)MVT::v2i32,
|
|
(int)MVT::v2f64,
|
|
(int)MVT::v2i64
|
|
};
|
|
size_t numTypes = sizeof(types) / sizeof(*types);
|
|
size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
|
|
size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
|
|
size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
|
|
|
|
const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
|
|
// These are the current register classes that are
|
|
// supported
|
|
|
|
for (unsigned int x = 0; x < numTypes; ++x) {
|
|
MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
|
|
|
|
//FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
|
|
// We cannot sextinreg, expand to shifts
|
|
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
|
|
setOperationAction(ISD::SUBE, VT, Expand);
|
|
setOperationAction(ISD::SUBC, VT, Expand);
|
|
setOperationAction(ISD::ADDE, VT, Expand);
|
|
setOperationAction(ISD::ADDC, VT, Expand);
|
|
setOperationAction(ISD::SETCC, VT, Custom);
|
|
setOperationAction(ISD::BRCOND, VT, Custom);
|
|
setOperationAction(ISD::BR_CC, VT, Custom);
|
|
setOperationAction(ISD::BR_JT, VT, Expand);
|
|
setOperationAction(ISD::BRIND, VT, Expand);
|
|
// TODO: Implement custom UREM/SREM routines
|
|
setOperationAction(ISD::SREM, VT, Expand);
|
|
setOperationAction(ISD::GlobalAddress, VT, Custom);
|
|
setOperationAction(ISD::JumpTable, VT, Custom);
|
|
setOperationAction(ISD::ConstantPool, VT, Custom);
|
|
setOperationAction(ISD::SELECT, VT, Custom);
|
|
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
|
|
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
|
|
if (VT != MVT::i64 && VT != MVT::v2i64) {
|
|
setOperationAction(ISD::SDIV, VT, Custom);
|
|
}
|
|
}
|
|
for (unsigned int x = 0; x < numFloatTypes; ++x) {
|
|
MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
|
|
|
|
// IL does not have these operations for floating point types
|
|
setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
|
|
setOperationAction(ISD::SETOLT, VT, Expand);
|
|
setOperationAction(ISD::SETOGE, VT, Expand);
|
|
setOperationAction(ISD::SETOGT, VT, Expand);
|
|
setOperationAction(ISD::SETOLE, VT, Expand);
|
|
setOperationAction(ISD::SETULT, VT, Expand);
|
|
setOperationAction(ISD::SETUGE, VT, Expand);
|
|
setOperationAction(ISD::SETUGT, VT, Expand);
|
|
setOperationAction(ISD::SETULE, VT, Expand);
|
|
}
|
|
|
|
for (unsigned int x = 0; x < numIntTypes; ++x) {
|
|
MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
|
|
|
|
// GPU also does not have divrem function for signed or unsigned
|
|
setOperationAction(ISD::SDIVREM, VT, Expand);
|
|
|
|
// GPU does not have [S|U]MUL_LOHI functions as a single instruction
|
|
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
|
|
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
|
|
|
|
// GPU doesn't have a rotl, rotr, or byteswap instruction
|
|
setOperationAction(ISD::ROTR, VT, Expand);
|
|
setOperationAction(ISD::BSWAP, VT, Expand);
|
|
|
|
// GPU doesn't have any counting operators
|
|
setOperationAction(ISD::CTPOP, VT, Expand);
|
|
setOperationAction(ISD::CTTZ, VT, Expand);
|
|
setOperationAction(ISD::CTLZ, VT, Expand);
|
|
}
|
|
|
|
for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
|
|
{
|
|
MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
|
|
|
|
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
|
|
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
|
|
setOperationAction(ISD::SDIVREM, VT, Expand);
|
|
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
|
|
// setOperationAction(ISD::VSETCC, VT, Expand);
|
|
setOperationAction(ISD::SETCC, VT, Expand);
|
|
setOperationAction(ISD::SELECT_CC, VT, Expand);
|
|
setOperationAction(ISD::SELECT, VT, Expand);
|
|
|
|
}
|
|
if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) {
|
|
setOperationAction(ISD::MULHU, MVT::i64, Expand);
|
|
setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
|
|
setOperationAction(ISD::MULHS, MVT::i64, Expand);
|
|
setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
|
|
setOperationAction(ISD::ADD, MVT::v2i64, Expand);
|
|
setOperationAction(ISD::SREM, MVT::v2i64, Expand);
|
|
setOperationAction(ISD::Constant , MVT::i64 , Legal);
|
|
setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
|
|
setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
|
|
setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
|
|
setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
|
|
setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
|
|
}
|
|
if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
|
|
// we support loading/storing v2f64 but not operations on the type
|
|
setOperationAction(ISD::FADD, MVT::v2f64, Expand);
|
|
setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
|
|
setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
|
|
setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
|
|
setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
|
|
setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
|
|
// We want to expand vector conversions into their scalar
|
|
// counterparts.
|
|
setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
|
|
setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
|
|
setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
|
|
setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
|
|
setOperationAction(ISD::FABS, MVT::f64, Expand);
|
|
setOperationAction(ISD::FABS, MVT::v2f64, Expand);
|
|
}
|
|
// TODO: Fix the UDIV24 algorithm so it works for these
|
|
// types correctly. This needs vector comparisons
|
|
// for this to work correctly.
|
|
setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
|
|
setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
|
|
setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
|
|
setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
|
|
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
|
|
setOperationAction(ISD::SUBC, MVT::Other, Expand);
|
|
setOperationAction(ISD::ADDE, MVT::Other, Expand);
|
|
setOperationAction(ISD::ADDC, MVT::Other, Expand);
|
|
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
|
|
setOperationAction(ISD::BR_CC, MVT::Other, Custom);
|
|
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
|
|
setOperationAction(ISD::BRIND, MVT::Other, Expand);
|
|
setOperationAction(ISD::SETCC, MVT::Other, Custom);
|
|
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
|
|
|
|
setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
|
|
// Use the default implementation.
|
|
setOperationAction(ISD::VAARG , MVT::Other, Expand);
|
|
setOperationAction(ISD::VACOPY , MVT::Other, Expand);
|
|
setOperationAction(ISD::VAEND , MVT::Other, Expand);
|
|
setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
|
|
setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
|
|
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
|
|
setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
|
|
setOperationAction(ISD::Constant , MVT::i32 , Legal);
|
|
setOperationAction(ISD::TRAP , MVT::Other , Legal);
|
|
|
|
setStackPointerRegisterToSaveRestore(AMDGPU::SP);
|
|
setSchedulingPreference(Sched::RegPressure);
|
|
setPow2DivIsCheap(false);
|
|
setPrefLoopAlignment(16);
|
|
setSelectIsExpensive(true);
|
|
setJumpIsExpensive(true);
|
|
|
|
maxStoresPerMemcpy = 4096;
|
|
maxStoresPerMemmove = 4096;
|
|
maxStoresPerMemset = 4096;
|
|
|
|
#undef numTypes
|
|
#undef numIntTypes
|
|
#undef numVectorTypes
|
|
#undef numFloatTypes
|
|
}
|
|
|
|
const char *
|
|
AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
|
|
{
|
|
switch (Opcode) {
|
|
default: return 0;
|
|
case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG";
|
|
case AMDILISD::MAD: return "AMDILISD::MAD";
|
|
case AMDILISD::CALL: return "AMDILISD::CALL";
|
|
case AMDILISD::SELECT_CC: return "AMDILISD::SELECT_CC";
|
|
case AMDILISD::UMUL: return "AMDILISD::UMUL";
|
|
case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
|
|
case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
|
|
case AMDILISD::CMP: return "AMDILISD::CMP";
|
|
case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
|
|
case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
|
|
case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
|
|
case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
|
|
case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
|
|
case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
|
|
case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
|
|
case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
|
|
|
|
};
|
|
}
|
|
bool
|
|
AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|
const CallInst &I, unsigned Intrinsic) const
|
|
{
|
|
return false;
|
|
}
|
|
|
|
// The backend supports 32 and 64 bit floating point immediates
|
|
bool
|
|
AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
|
|
{
|
|
if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
|
|
|| VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
|
|
return true;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool
|
|
AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
|
|
{
|
|
if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
|
|
|| VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
|
|
return false;
|
|
} else {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
|
|
// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
|
|
// be zero. Op is expected to be a target specific node. Used by DAG
|
|
// combiner.
|
|
|
|
void
|
|
AMDILTargetLowering::computeMaskedBitsForTargetNode(
|
|
const SDValue Op,
|
|
APInt &KnownZero,
|
|
APInt &KnownOne,
|
|
const SelectionDAG &DAG,
|
|
unsigned Depth) const
|
|
{
|
|
APInt KnownZero2;
|
|
APInt KnownOne2;
|
|
KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
|
|
switch (Op.getOpcode()) {
|
|
default: break;
|
|
case AMDILISD::SELECT_CC:
|
|
DAG.ComputeMaskedBits(
|
|
Op.getOperand(1),
|
|
KnownZero,
|
|
KnownOne,
|
|
Depth + 1
|
|
);
|
|
DAG.ComputeMaskedBits(
|
|
Op.getOperand(0),
|
|
KnownZero2,
|
|
KnownOne2
|
|
);
|
|
assert((KnownZero & KnownOne) == 0
|
|
&& "Bits known to be one AND zero?");
|
|
assert((KnownZero2 & KnownOne2) == 0
|
|
&& "Bits known to be one AND zero?");
|
|
// Only known if known in both the LHS and RHS
|
|
KnownOne &= KnownOne2;
|
|
KnownZero &= KnownZero2;
|
|
break;
|
|
};
|
|
}
|
|
|
|
// This is the function that determines which calling convention should
|
|
// be used. Currently there is only one calling convention
|
|
CCAssignFn*
|
|
AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
|
|
{
|
|
//uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
|
return CC_AMDIL32;
|
|
}
|
|
|
|
// LowerCallResult - Lower the result values of an ISD::CALL into the
|
|
// appropriate copies out of appropriate physical registers. This assumes that
|
|
// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
|
|
// being lowered. The returns a SDNode with the same number of values as the
|
|
// ISD::CALL.
|
|
SDValue
|
|
AMDILTargetLowering::LowerCallResult(
|
|
SDValue Chain,
|
|
SDValue InFlag,
|
|
CallingConv::ID CallConv,
|
|
bool isVarArg,
|
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
|
DebugLoc dl,
|
|
SelectionDAG &DAG,
|
|
SmallVectorImpl<SDValue> &InVals) const
|
|
{
|
|
// Assign locations to each value returned by this call
|
|
SmallVector<CCValAssign, 16> RVLocs;
|
|
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
|
|
getTargetMachine(), RVLocs, *DAG.getContext());
|
|
CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
|
|
|
|
// Copy all of the result registers out of their specified physreg.
|
|
for (unsigned i = 0; i != RVLocs.size(); ++i) {
|
|
EVT CopyVT = RVLocs[i].getValVT();
|
|
if (RVLocs[i].isRegLoc()) {
|
|
Chain = DAG.getCopyFromReg(
|
|
Chain,
|
|
dl,
|
|
RVLocs[i].getLocReg(),
|
|
CopyVT,
|
|
InFlag
|
|
).getValue(1);
|
|
SDValue Val = Chain.getValue(0);
|
|
InFlag = Chain.getValue(2);
|
|
InVals.push_back(Val);
|
|
}
|
|
}
|
|
|
|
return Chain;
|
|
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Other Lowering Hooks
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Recursively assign SDNodeOrdering to any unordered nodes
|
|
// This is necessary to maintain source ordering of instructions
|
|
// under -O0 to avoid odd-looking "skipping around" issues.
|
|
static const SDValue
|
|
Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
|
|
{
|
|
if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
|
|
DAG.AssignOrdering( New.getNode(), order );
|
|
for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
|
|
Ordered( DAG, order, New.getOperand(i) );
|
|
}
|
|
return New;
|
|
}
|
|
|
|
#define LOWER(A) \
|
|
case ISD:: A: \
|
|
return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
|
|
|
|
SDValue
|
|
AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
|
{
|
|
switch (Op.getOpcode()) {
|
|
default:
|
|
Op.getNode()->dump();
|
|
assert(0 && "Custom lowering code for this"
|
|
"instruction is not implemented yet!");
|
|
break;
|
|
LOWER(GlobalAddress);
|
|
LOWER(JumpTable);
|
|
LOWER(ConstantPool);
|
|
LOWER(ExternalSymbol);
|
|
LOWER(SDIV);
|
|
LOWER(SREM);
|
|
LOWER(BUILD_VECTOR);
|
|
LOWER(SELECT);
|
|
LOWER(SETCC);
|
|
LOWER(SIGN_EXTEND_INREG);
|
|
LOWER(DYNAMIC_STACKALLOC);
|
|
LOWER(BRCOND);
|
|
LOWER(BR_CC);
|
|
}
|
|
return Op;
|
|
}
|
|
|
|
#undef LOWER
|
|
|
|
SDValue
|
|
AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
|
|
{
|
|
SDValue DST = Op;
|
|
const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
|
|
const GlobalValue *G = GADN->getGlobal();
|
|
DebugLoc DL = Op.getDebugLoc();
|
|
const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
|
|
if (!GV) {
|
|
DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
|
|
} else {
|
|
if (GV->hasInitializer()) {
|
|
const Constant *C = dyn_cast<Constant>(GV->getInitializer());
|
|
if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
|
|
DST = DAG.getConstant(CI->getValue(), Op.getValueType());
|
|
} else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
|
|
DST = DAG.getConstantFP(CF->getValueAPF(),
|
|
Op.getValueType());
|
|
} else if (dyn_cast<ConstantAggregateZero>(C)) {
|
|
EVT VT = Op.getValueType();
|
|
if (VT.isInteger()) {
|
|
DST = DAG.getConstant(0, VT);
|
|
} else {
|
|
DST = DAG.getConstantFP(0, VT);
|
|
}
|
|
} else {
|
|
assert(!"lowering this type of Global Address "
|
|
"not implemented yet!");
|
|
C->dump();
|
|
DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
|
|
}
|
|
} else {
|
|
DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
|
|
}
|
|
}
|
|
return DST;
|
|
}
|
|
|
|
SDValue
|
|
AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
|
|
{
|
|
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
|
|
SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
|
|
return Result;
|
|
}
|
|
SDValue
|
|
AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
|
|
{
|
|
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
|
|
EVT PtrVT = Op.getValueType();
|
|
SDValue Result;
|
|
if (CP->isMachineConstantPoolEntry()) {
|
|
Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
|
|
CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
|
|
} else {
|
|
Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
|
|
CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
|
|
}
|
|
return Result;
|
|
}
|
|
|
|
SDValue
|
|
AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
|
|
{
|
|
const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
|
|
SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
|
|
return Result;
|
|
}
|
|
|
|
/// LowerFORMAL_ARGUMENTS - transform physical registers into
|
|
/// virtual registers and generate load operations for
|
|
/// arguments places on the stack.
|
|
/// TODO: isVarArg, hasStructRet, isMemReg
|
|
SDValue
|
|
AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
|
|
CallingConv::ID CallConv,
|
|
bool isVarArg,
|
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
|
DebugLoc dl,
|
|
SelectionDAG &DAG,
|
|
SmallVectorImpl<SDValue> &InVals)
|
|
const
|
|
{
|
|
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
MachineFrameInfo *MFI = MF.getFrameInfo();
|
|
//const Function *Fn = MF.getFunction();
|
|
//MachineRegisterInfo &RegInfo = MF.getRegInfo();
|
|
|
|
SmallVector<CCValAssign, 16> ArgLocs;
|
|
CallingConv::ID CC = MF.getFunction()->getCallingConv();
|
|
//bool hasStructRet = MF.getFunction()->hasStructRetAttr();
|
|
|
|
CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
|
|
getTargetMachine(), ArgLocs, *DAG.getContext());
|
|
|
|
// When more calling conventions are added, they need to be chosen here
|
|
CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
|
|
SDValue StackPtr;
|
|
|
|
//unsigned int FirstStackArgLoc = 0;
|
|
|
|
for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
|
|
CCValAssign &VA = ArgLocs[i];
|
|
if (VA.isRegLoc()) {
|
|
EVT RegVT = VA.getLocVT();
|
|
const TargetRegisterClass *RC = getRegClassFor(
|
|
RegVT.getSimpleVT().SimpleTy);
|
|
|
|
unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
|
|
SDValue ArgValue = DAG.getCopyFromReg(
|
|
Chain,
|
|
dl,
|
|
Reg,
|
|
RegVT);
|
|
// If this is an 8 or 16-bit value, it is really passed
|
|
// promoted to 32 bits. Insert an assert[sz]ext to capture
|
|
// this, then truncate to the right size.
|
|
|
|
if (VA.getLocInfo() == CCValAssign::SExt) {
|
|
ArgValue = DAG.getNode(
|
|
ISD::AssertSext,
|
|
dl,
|
|
RegVT,
|
|
ArgValue,
|
|
DAG.getValueType(VA.getValVT()));
|
|
} else if (VA.getLocInfo() == CCValAssign::ZExt) {
|
|
ArgValue = DAG.getNode(
|
|
ISD::AssertZext,
|
|
dl,
|
|
RegVT,
|
|
ArgValue,
|
|
DAG.getValueType(VA.getValVT()));
|
|
}
|
|
if (VA.getLocInfo() != CCValAssign::Full) {
|
|
ArgValue = DAG.getNode(
|
|
ISD::TRUNCATE,
|
|
dl,
|
|
VA.getValVT(),
|
|
ArgValue);
|
|
}
|
|
// Add the value to the list of arguments
|
|
// to be passed in registers
|
|
InVals.push_back(ArgValue);
|
|
if (isVarArg) {
|
|
assert(0 && "Variable arguments are not yet supported");
|
|
// See MipsISelLowering.cpp for ideas on how to implement
|
|
}
|
|
} else if(VA.isMemLoc()) {
|
|
InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
|
|
dl, DAG, VA, MFI, i));
|
|
} else {
|
|
assert(0 && "found a Value Assign that is "
|
|
"neither a register or a memory location");
|
|
}
|
|
}
|
|
/*if (hasStructRet) {
|
|
assert(0 && "Has struct return is not yet implemented");
|
|
// See MipsISelLowering.cpp for ideas on how to implement
|
|
}*/
|
|
|
|
if (isVarArg) {
|
|
assert(0 && "Variable arguments are not yet supported");
|
|
// See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
|
|
}
|
|
// This needs to be changed to non-zero if the return function needs
|
|
// to pop bytes
|
|
return Chain;
|
|
}
|
|
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
|
|
/// by "Src" to address "Dst" with size and alignment information specified by
|
|
/// the specific parameter attribute. The copy will be passed as a byval
|
|
/// function parameter.
|
|
static SDValue
|
|
CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
|
|
ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
|
|
assert(0 && "MemCopy does not exist yet");
|
|
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
|
|
|
|
return DAG.getMemcpy(Chain,
|
|
Src.getDebugLoc(),
|
|
Dst, Src, SizeNode, Flags.getByValAlign(),
|
|
/*IsVol=*/false, /*AlwaysInline=*/true,
|
|
MachinePointerInfo(), MachinePointerInfo());
|
|
}
|
|
|
|
SDValue
|
|
AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
|
|
SDValue StackPtr, SDValue Arg,
|
|
DebugLoc dl, SelectionDAG &DAG,
|
|
const CCValAssign &VA,
|
|
ISD::ArgFlagsTy Flags) const
|
|
{
|
|
unsigned int LocMemOffset = VA.getLocMemOffset();
|
|
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
|
|
PtrOff = DAG.getNode(ISD::ADD,
|
|
dl,
|
|
getPointerTy(), StackPtr, PtrOff);
|
|
if (Flags.isByVal()) {
|
|
PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
|
|
} else {
|
|
PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
|
|
MachinePointerInfo::getStack(LocMemOffset),
|
|
false, false, 0);
|
|
}
|
|
return PtrOff;
|
|
}
|
|
/// LowerCAL - functions arguments are copied from virtual
|
|
/// regs to (physical regs)/(stack frame), CALLSEQ_START and
|
|
/// CALLSEQ_END are emitted.
|
|
/// TODO: isVarArg, isTailCall, hasStructRet
|
|
SDValue
|
|
AMDILTargetLowering::LowerCall(CallLoweringInfo &CLI,
|
|
SmallVectorImpl<SDValue> &InVals) const
|
|
|
|
#if 0
|
|
SDValue Chain, SDValue Callee,
|
|
CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
|
|
bool& isTailCall,
|
|
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
|
const SmallVectorImpl<SDValue> &OutVals,
|
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
|
DebugLoc dl, SelectionDAG &DAG,
|
|
#endif
|
|
{
|
|
CLI.IsTailCall = false;
|
|
MachineFunction& MF = CLI.DAG.getMachineFunction();
|
|
// FIXME: DO we need to handle fast calling conventions and tail call
|
|
// optimizations?? X86/PPC ISelLowering
|
|
/*bool hasStructRet = (TheCall->getNumArgs())
|
|
? TheCall->getArgFlags(0).device()->isSRet()
|
|
: false;*/
|
|
|
|
MachineFrameInfo *MFI = MF.getFrameInfo();
|
|
|
|
// Analyze operands of the call, assigning locations to each operand
|
|
SmallVector<CCValAssign, 16> ArgLocs;
|
|
CCState CCInfo(CLI.CallConv, CLI.IsVarArg, CLI.DAG.getMachineFunction(),
|
|
getTargetMachine(), ArgLocs, *CLI.DAG.getContext());
|
|
// Analyize the calling operands, but need to change
|
|
// if we have more than one calling convetion
|
|
CCInfo.AnalyzeCallOperands(CLI.Outs, CCAssignFnForNode(CLI.CallConv));
|
|
|
|
unsigned int NumBytes = CCInfo.getNextStackOffset();
|
|
if (CLI.IsTailCall) {
|
|
assert(CLI.IsTailCall && "Tail Call not handled yet!");
|
|
// See X86/PPC ISelLowering
|
|
}
|
|
|
|
CLI.Chain = CLI.DAG.getCALLSEQ_START(CLI.Chain,
|
|
CLI.DAG.getIntPtrConstant(NumBytes, true));
|
|
|
|
SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
|
|
SmallVector<SDValue, 8> MemOpChains;
|
|
SDValue StackPtr;
|
|
//unsigned int FirstStacArgLoc = 0;
|
|
//int LastArgStackLoc = 0;
|
|
|
|
// Walk the register/memloc assignments, insert copies/loads
|
|
for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
|
|
CCValAssign &VA = ArgLocs[i];
|
|
//bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
|
|
// Arguments start after the 5 first operands of ISD::CALL
|
|
SDValue Arg = CLI.OutVals[i];
|
|
//Promote the value if needed
|
|
switch(VA.getLocInfo()) {
|
|
default: assert(0 && "Unknown loc info!");
|
|
case CCValAssign::Full:
|
|
break;
|
|
case CCValAssign::SExt:
|
|
Arg = CLI.DAG.getNode(ISD::SIGN_EXTEND,
|
|
CLI.DL,
|
|
VA.getLocVT(), Arg);
|
|
break;
|
|
case CCValAssign::ZExt:
|
|
Arg = CLI.DAG.getNode(ISD::ZERO_EXTEND,
|
|
CLI.DL,
|
|
VA.getLocVT(), Arg);
|
|
break;
|
|
case CCValAssign::AExt:
|
|
Arg = CLI.DAG.getNode(ISD::ANY_EXTEND,
|
|
CLI.DL,
|
|
VA.getLocVT(), Arg);
|
|
break;
|
|
}
|
|
|
|
if (VA.isRegLoc()) {
|
|
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
|
|
} else if (VA.isMemLoc()) {
|
|
// Create the frame index object for this incoming parameter
|
|
int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
|
|
VA.getLocMemOffset(), true);
|
|
SDValue PtrOff = CLI.DAG.getFrameIndex(FI,getPointerTy());
|
|
|
|
// emit ISD::STORE whichs stores the
|
|
// parameter value to a stack Location
|
|
MemOpChains.push_back(CLI.DAG.getStore(CLI.Chain, CLI.DL, Arg, PtrOff,
|
|
MachinePointerInfo::getFixedStack(FI),
|
|
false, false, 0));
|
|
} else {
|
|
assert(0 && "Not a Reg/Mem Loc, major error!");
|
|
}
|
|
}
|
|
if (!MemOpChains.empty()) {
|
|
CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor,
|
|
CLI.DL,
|
|
MVT::Other,
|
|
&MemOpChains[0],
|
|
MemOpChains.size());
|
|
}
|
|
SDValue InFlag;
|
|
if (!CLI.IsTailCall) {
|
|
for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
|
|
CLI.Chain = CLI.DAG.getCopyToReg(CLI.Chain,
|
|
CLI.DL,
|
|
RegsToPass[i].first,
|
|
RegsToPass[i].second,
|
|
InFlag);
|
|
InFlag = CLI.Chain.getValue(1);
|
|
}
|
|
}
|
|
|
|
// If the callee is a GlobalAddress/ExternalSymbol node (quite common,
|
|
// every direct call is) turn it into a TargetGlobalAddress/
|
|
// TargetExternalSymbol
|
|
// node so that legalize doesn't hack it.
|
|
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(CLI.Callee)) {
|
|
CLI.Callee = CLI.DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, getPointerTy());
|
|
}
|
|
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) {
|
|
CLI.Callee = CLI.DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
|
|
}
|
|
else if (CLI.IsTailCall) {
|
|
assert(0 && "Tail calls are not handled yet");
|
|
// see X86 ISelLowering for ideas on implementation: 1708
|
|
}
|
|
|
|
SDVTList NodeTys = CLI.DAG.getVTList(MVT::Other, MVTGLUE);
|
|
SmallVector<SDValue, 8> Ops;
|
|
|
|
if (CLI.IsTailCall) {
|
|
assert(0 && "Tail calls are not handled yet");
|
|
// see X86 ISelLowering for ideas on implementation: 1721
|
|
}
|
|
// If this is a direct call, pass the chain and the callee
|
|
if (CLI.Callee.getNode()) {
|
|
Ops.push_back(CLI.Chain);
|
|
Ops.push_back(CLI.Callee);
|
|
}
|
|
|
|
if (CLI.IsTailCall) {
|
|
assert(0 && "Tail calls are not handled yet");
|
|
// see X86 ISelLowering for ideas on implementation: 1739
|
|
}
|
|
|
|
// Add argument registers to the end of the list so that they are known
|
|
// live into the call
|
|
for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
|
|
Ops.push_back(CLI.DAG.getRegister(
|
|
RegsToPass[i].first,
|
|
RegsToPass[i].second.getValueType()));
|
|
}
|
|
if (InFlag.getNode()) {
|
|
Ops.push_back(InFlag);
|
|
}
|
|
|
|
// Emit Tail Call
|
|
if (CLI.IsTailCall) {
|
|
assert(0 && "Tail calls are not handled yet");
|
|
// see X86 ISelLowering for ideas on implementation: 1762
|
|
}
|
|
|
|
CLI.Chain = CLI.DAG.getNode(AMDILISD::CALL,
|
|
CLI.DL,
|
|
NodeTys, &Ops[0], Ops.size());
|
|
InFlag = CLI.Chain.getValue(1);
|
|
|
|
// Create the CALLSEQ_END node
|
|
CLI.Chain = CLI.DAG.getCALLSEQ_END(
|
|
CLI.Chain,
|
|
CLI.DAG.getIntPtrConstant(NumBytes, true),
|
|
CLI.DAG.getIntPtrConstant(0, true),
|
|
InFlag);
|
|
InFlag = CLI.Chain.getValue(1);
|
|
// Handle result values, copying them out of physregs into vregs that
|
|
// we return
|
|
return LowerCallResult(CLI.Chain, InFlag, CLI.CallConv, CLI.IsVarArg, CLI.Ins, CLI.DL, CLI.DAG,
|
|
InVals);
|
|
}
|
|
|
|
SDValue
|
|
AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
|
|
{
|
|
EVT OVT = Op.getValueType();
|
|
SDValue DST;
|
|
if (OVT.getScalarType() == MVT::i64) {
|
|
DST = LowerSDIV64(Op, DAG);
|
|
} else if (OVT.getScalarType() == MVT::i32) {
|
|
DST = LowerSDIV32(Op, DAG);
|
|
} else if (OVT.getScalarType() == MVT::i16
|
|
|| OVT.getScalarType() == MVT::i8) {
|
|
DST = LowerSDIV24(Op, DAG);
|
|
} else {
|
|
DST = SDValue(Op.getNode(), 0);
|
|
}
|
|
return DST;
|
|
}
|
|
|
|
SDValue
|
|
AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
|
|
{
|
|
EVT OVT = Op.getValueType();
|
|
SDValue DST;
|
|
if (OVT.getScalarType() == MVT::i64) {
|
|
DST = LowerSREM64(Op, DAG);
|
|
} else if (OVT.getScalarType() == MVT::i32) {
|
|
DST = LowerSREM32(Op, DAG);
|
|
} else if (OVT.getScalarType() == MVT::i16) {
|
|
DST = LowerSREM16(Op, DAG);
|
|
} else if (OVT.getScalarType() == MVT::i8) {
|
|
DST = LowerSREM8(Op, DAG);
|
|
} else {
|
|
DST = SDValue(Op.getNode(), 0);
|
|
}
|
|
return DST;
|
|
}
|
|
|
|
SDValue
|
|
AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
|
|
{
|
|
EVT VT = Op.getValueType();
|
|
SDValue Nodes1;
|
|
SDValue second;
|
|
SDValue third;
|
|
SDValue fourth;
|
|
DebugLoc DL = Op.getDebugLoc();
|
|
Nodes1 = DAG.getNode(AMDILISD::VBUILD,
|
|
DL,
|
|
VT, Op.getOperand(0));
|
|
#if 0
|
|
bool allEqual = true;
|
|
for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
|
|
if (Op.getOperand(0) != Op.getOperand(x)) {
|
|
allEqual = false;
|
|
break;
|
|
}
|
|
}
|
|
if (allEqual) {
|
|
return Nodes1;
|
|
}
|
|
#endif
|
|
switch(Op.getNumOperands()) {
|
|
default:
|
|
case 1:
|
|
break;
|
|
case 4:
|
|
fourth = Op.getOperand(3);
|
|
if (fourth.getOpcode() != ISD::UNDEF) {
|
|
Nodes1 = DAG.getNode(
|
|
ISD::INSERT_VECTOR_ELT,
|
|
DL,
|
|
Op.getValueType(),
|
|
Nodes1,
|
|
fourth,
|
|
DAG.getConstant(7, MVT::i32));
|
|
}
|
|
case 3:
|
|
third = Op.getOperand(2);
|
|
if (third.getOpcode() != ISD::UNDEF) {
|
|
Nodes1 = DAG.getNode(
|
|
ISD::INSERT_VECTOR_ELT,
|
|
DL,
|
|
Op.getValueType(),
|
|
Nodes1,
|
|
third,
|
|
DAG.getConstant(6, MVT::i32));
|
|
}
|
|
case 2:
|
|
second = Op.getOperand(1);
|
|
if (second.getOpcode() != ISD::UNDEF) {
|
|
Nodes1 = DAG.getNode(
|
|
ISD::INSERT_VECTOR_ELT,
|
|
DL,
|
|
Op.getValueType(),
|
|
Nodes1,
|
|
second,
|
|
DAG.getConstant(5, MVT::i32));
|
|
}
|
|
break;
|
|
};
|
|
return Nodes1;
|
|
}
|
|
|
|
SDValue
|
|
AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
|
|
{
|
|
SDValue Cond = Op.getOperand(0);
|
|
SDValue LHS = Op.getOperand(1);
|
|
SDValue RHS = Op.getOperand(2);
|
|
DebugLoc DL = Op.getDebugLoc();
|
|
Cond = getConversionNode(DAG, Cond, Op, true);
|
|
Cond = DAG.getNode(AMDILISD::CMOVLOG,
|
|
DL,
|
|
Op.getValueType(), Cond, LHS, RHS);
|
|
return Cond;
|
|
}
|
|
SDValue
|
|
AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
|
|
{
|
|
SDValue Cond;
|
|
SDValue LHS = Op.getOperand(0);
|
|
SDValue RHS = Op.getOperand(1);
|
|
SDValue CC = Op.getOperand(2);
|
|
DebugLoc DL = Op.getDebugLoc();
|
|
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
|
|
unsigned int AMDILCC = CondCCodeToCC(
|
|
SetCCOpcode,
|
|
LHS.getValueType().getSimpleVT().SimpleTy);
|
|
assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
|
|
Cond = DAG.getNode(
|
|
ISD::SELECT_CC,
|
|
Op.getDebugLoc(),
|
|
LHS.getValueType(),
|
|
LHS, RHS,
|
|
DAG.getConstant(-1, MVT::i32),
|
|
DAG.getConstant(0, MVT::i32),
|
|
CC);
|
|
Cond = getConversionNode(DAG, Cond, Op, true);
|
|
Cond = DAG.getNode(
|
|
ISD::AND,
|
|
DL,
|
|
Cond.getValueType(),
|
|
DAG.getConstant(1, Cond.getValueType()),
|
|
Cond);
|
|
return Cond;
|
|
}
|
|
|
|
SDValue
|
|
AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
|
|
{
|
|
SDValue Data = Op.getOperand(0);
|
|
VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
|
|
DebugLoc DL = Op.getDebugLoc();
|
|
EVT DVT = Data.getValueType();
|
|
EVT BVT = BaseType->getVT();
|
|
unsigned baseBits = BVT.getScalarType().getSizeInBits();
|
|
unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
|
|
unsigned shiftBits = srcBits - baseBits;
|
|
if (srcBits < 32) {
|
|
// If the op is less than 32 bits, then it needs to extend to 32bits
|
|
// so it can properly keep the upper bits valid.
|
|
EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
|
|
Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
|
|
shiftBits = 32 - baseBits;
|
|
DVT = IVT;
|
|
}
|
|
SDValue Shift = DAG.getConstant(shiftBits, DVT);
|
|
// Shift left by 'Shift' bits.
|
|
Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
|
|
// Signed shift Right by 'Shift' bits.
|
|
Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
|
|
if (srcBits < 32) {
|
|
// Once the sign extension is done, the op needs to be converted to
|
|
// its original type.
|
|
Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
|
|
}
|
|
return Data;
|
|
}
|
|
EVT
|
|
AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
|
|
{
|
|
int iSize = (size * numEle);
|
|
int vEle = (iSize >> ((size == 64) ? 6 : 5));
|
|
if (!vEle) {
|
|
vEle = 1;
|
|
}
|
|
if (size == 64) {
|
|
if (vEle == 1) {
|
|
return EVT(MVT::i64);
|
|
} else {
|
|
return EVT(MVT::getVectorVT(MVT::i64, vEle));
|
|
}
|
|
} else {
|
|
if (vEle == 1) {
|
|
return EVT(MVT::i32);
|
|
} else {
|
|
return EVT(MVT::getVectorVT(MVT::i32, vEle));
|
|
}
|
|
}
|
|
}
|
|
|
|
SDValue
|
|
AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
|
|
SelectionDAG &DAG) const
|
|
{
|
|
SDValue Chain = Op.getOperand(0);
|
|
SDValue Size = Op.getOperand(1);
|
|
unsigned int SPReg = AMDGPU::SP;
|
|
DebugLoc DL = Op.getDebugLoc();
|
|
SDValue SP = DAG.getCopyFromReg(Chain,
|
|
DL,
|
|
SPReg, MVT::i32);
|
|
SDValue NewSP = DAG.getNode(ISD::ADD,
|
|
DL,
|
|
MVT::i32, SP, Size);
|
|
Chain = DAG.getCopyToReg(SP.getValue(1),
|
|
DL,
|
|
SPReg, NewSP);
|
|
SDValue Ops[2] = {NewSP, Chain};
|
|
Chain = DAG.getMergeValues(Ops, 2 ,DL);
|
|
return Chain;
|
|
}
|
|
SDValue
|
|
AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
|
|
{
|
|
SDValue Chain = Op.getOperand(0);
|
|
SDValue Cond = Op.getOperand(1);
|
|
SDValue Jump = Op.getOperand(2);
|
|
SDValue Result;
|
|
Result = DAG.getNode(
|
|
AMDILISD::BRANCH_COND,
|
|
Op.getDebugLoc(),
|
|
Op.getValueType(),
|
|
Chain, Jump, Cond);
|
|
return Result;
|
|
}
|
|
|
|
SDValue
|
|
AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
|
|
{
|
|
SDValue Chain = Op.getOperand(0);
|
|
SDValue CC = Op.getOperand(1);
|
|
SDValue LHS = Op.getOperand(2);
|
|
SDValue RHS = Op.getOperand(3);
|
|
SDValue JumpT = Op.getOperand(4);
|
|
SDValue CmpValue;
|
|
SDValue Result;
|
|
CmpValue = DAG.getNode(
|
|
ISD::SELECT_CC,
|
|
Op.getDebugLoc(),
|
|
LHS.getValueType(),
|
|
LHS, RHS,
|
|
DAG.getConstant(-1, MVT::i32),
|
|
DAG.getConstant(0, MVT::i32),
|
|
CC);
|
|
Result = DAG.getNode(
|
|
AMDILISD::BRANCH_COND,
|
|
CmpValue.getDebugLoc(),
|
|
MVT::Other, Chain,
|
|
JumpT, CmpValue);
|
|
return Result;
|
|
}
|
|
|
|
// LowerRET - Lower an ISD::RET node.
|
|
SDValue
|
|
AMDILTargetLowering::LowerReturn(SDValue Chain,
|
|
CallingConv::ID CallConv, bool isVarArg,
|
|
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
|
const SmallVectorImpl<SDValue> &OutVals,
|
|
DebugLoc dl, SelectionDAG &DAG)
|
|
const
|
|
{
|
|
//MachineFunction& MF = DAG.getMachineFunction();
|
|
// CCValAssign - represent the assignment of the return value
|
|
// to a location
|
|
SmallVector<CCValAssign, 16> RVLocs;
|
|
|
|
// CCState - Info about the registers and stack slot
|
|
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
|
|
getTargetMachine(), RVLocs, *DAG.getContext());
|
|
|
|
// Analyze return values of ISD::RET
|
|
CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
|
|
// If this is the first return lowered for this function, add
|
|
// the regs to the liveout set for the function
|
|
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
|
|
for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
|
|
if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
|
|
MRI.addLiveOut(RVLocs[i].getLocReg());
|
|
}
|
|
}
|
|
// FIXME: implement this when tail call is implemented
|
|
// Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
|
|
// both x86 and ppc implement this in ISelLowering
|
|
|
|
// Regular return here
|
|
SDValue Flag;
|
|
SmallVector<SDValue, 6> RetOps;
|
|
RetOps.push_back(Chain);
|
|
RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
|
|
for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
|
|
CCValAssign &VA = RVLocs[i];
|
|
SDValue ValToCopy = OutVals[i];
|
|
assert(VA.isRegLoc() && "Can only return in registers!");
|
|
// ISD::Ret => ret chain, (regnum1, val1), ...
|
|
// So i * 2 + 1 index only the regnums
|
|
Chain = DAG.getCopyToReg(Chain,
|
|
dl,
|
|
VA.getLocReg(),
|
|
ValToCopy,
|
|
Flag);
|
|
// guarantee that all emitted copies are stuck together
|
|
// avoiding something bad
|
|
Flag = Chain.getValue(1);
|
|
}
|
|
/*if (MF.getFunction()->hasStructRetAttr()) {
|
|
assert(0 && "Struct returns are not yet implemented!");
|
|
// Both MIPS and X86 have this
|
|
}*/
|
|
RetOps[0] = Chain;
|
|
if (Flag.getNode())
|
|
RetOps.push_back(Flag);
|
|
|
|
Flag = DAG.getNode(AMDILISD::RET_FLAG,
|
|
dl,
|
|
MVT::Other, &RetOps[0], RetOps.size());
|
|
return Flag;
|
|
}
|
|
|
|
unsigned int
|
|
AMDILTargetLowering::getFunctionAlignment(const Function *) const
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
SDValue
|
|
AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
|
|
{
|
|
DebugLoc DL = Op.getDebugLoc();
|
|
EVT OVT = Op.getValueType();
|
|
SDValue LHS = Op.getOperand(0);
|
|
SDValue RHS = Op.getOperand(1);
|
|
MVT INTTY;
|
|
MVT FLTTY;
|
|
if (!OVT.isVector()) {
|
|
INTTY = MVT::i32;
|
|
FLTTY = MVT::f32;
|
|
} else if (OVT.getVectorNumElements() == 2) {
|
|
INTTY = MVT::v2i32;
|
|
FLTTY = MVT::v2f32;
|
|
} else if (OVT.getVectorNumElements() == 4) {
|
|
INTTY = MVT::v4i32;
|
|
FLTTY = MVT::v4f32;
|
|
}
|
|
unsigned bitsize = OVT.getScalarType().getSizeInBits();
|
|
// char|short jq = ia ^ ib;
|
|
SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
|
|
|
|
// jq = jq >> (bitsize - 2)
|
|
jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
|
|
|
|
// jq = jq | 0x1
|
|
jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
|
|
|
|
// jq = (int)jq
|
|
jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
|
|
|
|
// int ia = (int)LHS;
|
|
SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
|
|
|
|
// int ib, (int)RHS;
|
|
SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
|
|
|
|
// float fa = (float)ia;
|
|
SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
|
|
|
|
// float fb = (float)ib;
|
|
SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
|
|
|
|
// float fq = native_divide(fa, fb);
|
|
SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
|
|
|
|
// fq = trunc(fq);
|
|
fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
|
|
|
|
// float fqneg = -fq;
|
|
SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
|
|
|
|
// float fr = mad(fqneg, fb, fa);
|
|
SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
|
|
|
|
// int iq = (int)fq;
|
|
SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
|
|
|
|
// fr = fabs(fr);
|
|
fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
|
|
|
|
// fb = fabs(fb);
|
|
fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
|
|
|
|
// int cv = fr >= fb;
|
|
SDValue cv;
|
|
if (INTTY == MVT::i32) {
|
|
cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
|
|
} else {
|
|
cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
|
|
}
|
|
// jq = (cv ? jq : 0);
|
|
jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
|
|
DAG.getConstant(0, OVT));
|
|
// dst = iq + jq;
|
|
iq = DAG.getSExtOrTrunc(iq, DL, OVT);
|
|
iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
|
|
return iq;
|
|
}
|
|
|
|
SDValue
|
|
AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
|
|
{
|
|
DebugLoc DL = Op.getDebugLoc();
|
|
EVT OVT = Op.getValueType();
|
|
SDValue LHS = Op.getOperand(0);
|
|
SDValue RHS = Op.getOperand(1);
|
|
// The LowerSDIV32 function generates equivalent to the following IL.
|
|
// mov r0, LHS
|
|
// mov r1, RHS
|
|
// ilt r10, r0, 0
|
|
// ilt r11, r1, 0
|
|
// iadd r0, r0, r10
|
|
// iadd r1, r1, r11
|
|
// ixor r0, r0, r10
|
|
// ixor r1, r1, r11
|
|
// udiv r0, r0, r1
|
|
// ixor r10, r10, r11
|
|
// iadd r0, r0, r10
|
|
// ixor DST, r0, r10
|
|
|
|
// mov r0, LHS
|
|
SDValue r0 = LHS;
|
|
|
|
// mov r1, RHS
|
|
SDValue r1 = RHS;
|
|
|
|
// ilt r10, r0, 0
|
|
SDValue r10 = DAG.getSelectCC(DL,
|
|
r0, DAG.getConstant(0, OVT),
|
|
DAG.getConstant(-1, MVT::i32),
|
|
DAG.getConstant(0, MVT::i32),
|
|
ISD::SETLT);
|
|
|
|
// ilt r11, r1, 0
|
|
SDValue r11 = DAG.getSelectCC(DL,
|
|
r1, DAG.getConstant(0, OVT),
|
|
DAG.getConstant(-1, MVT::i32),
|
|
DAG.getConstant(0, MVT::i32),
|
|
ISD::SETLT);
|
|
|
|
// iadd r0, r0, r10
|
|
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
|
|
|
|
// iadd r1, r1, r11
|
|
r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
|
|
|
|
// ixor r0, r0, r10
|
|
r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
|
|
|
|
// ixor r1, r1, r11
|
|
r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
|
|
|
|
// udiv r0, r0, r1
|
|
r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
|
|
|
|
// ixor r10, r10, r11
|
|
r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
|
|
|
|
// iadd r0, r0, r10
|
|
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
|
|
|
|
// ixor DST, r0, r10
|
|
SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
|
|
return DST;
|
|
}
|
|
|
|
SDValue
|
|
AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
|
|
{
|
|
return SDValue(Op.getNode(), 0);
|
|
}
|
|
|
|
SDValue
|
|
AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
|
|
{
|
|
DebugLoc DL = Op.getDebugLoc();
|
|
EVT OVT = Op.getValueType();
|
|
MVT INTTY = MVT::i32;
|
|
if (OVT == MVT::v2i8) {
|
|
INTTY = MVT::v2i32;
|
|
} else if (OVT == MVT::v4i8) {
|
|
INTTY = MVT::v4i32;
|
|
}
|
|
SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
|
|
SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
|
|
LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
|
|
LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
|
|
return LHS;
|
|
}
|
|
|
|
SDValue
|
|
AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
|
|
{
|
|
DebugLoc DL = Op.getDebugLoc();
|
|
EVT OVT = Op.getValueType();
|
|
MVT INTTY = MVT::i32;
|
|
if (OVT == MVT::v2i16) {
|
|
INTTY = MVT::v2i32;
|
|
} else if (OVT == MVT::v4i16) {
|
|
INTTY = MVT::v4i32;
|
|
}
|
|
SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
|
|
SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
|
|
LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
|
|
LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
|
|
return LHS;
|
|
}
|
|
|
|
SDValue
|
|
AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
|
|
{
|
|
DebugLoc DL = Op.getDebugLoc();
|
|
EVT OVT = Op.getValueType();
|
|
SDValue LHS = Op.getOperand(0);
|
|
SDValue RHS = Op.getOperand(1);
|
|
// The LowerSREM32 function generates equivalent to the following IL.
|
|
// mov r0, LHS
|
|
// mov r1, RHS
|
|
// ilt r10, r0, 0
|
|
// ilt r11, r1, 0
|
|
// iadd r0, r0, r10
|
|
// iadd r1, r1, r11
|
|
// ixor r0, r0, r10
|
|
// ixor r1, r1, r11
|
|
// udiv r20, r0, r1
|
|
// umul r20, r20, r1
|
|
// sub r0, r0, r20
|
|
// iadd r0, r0, r10
|
|
// ixor DST, r0, r10
|
|
|
|
// mov r0, LHS
|
|
SDValue r0 = LHS;
|
|
|
|
// mov r1, RHS
|
|
SDValue r1 = RHS;
|
|
|
|
// ilt r10, r0, 0
|
|
SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
|
|
DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
|
|
r0, DAG.getConstant(0, OVT));
|
|
|
|
// ilt r11, r1, 0
|
|
SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
|
|
DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
|
|
r1, DAG.getConstant(0, OVT));
|
|
|
|
// iadd r0, r0, r10
|
|
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
|
|
|
|
// iadd r1, r1, r11
|
|
r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
|
|
|
|
// ixor r0, r0, r10
|
|
r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
|
|
|
|
// ixor r1, r1, r11
|
|
r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
|
|
|
|
// udiv r20, r0, r1
|
|
SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
|
|
|
|
// umul r20, r20, r1
|
|
r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
|
|
|
|
// sub r0, r0, r20
|
|
r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
|
|
|
|
// iadd r0, r0, r10
|
|
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
|
|
|
|
// ixor DST, r0, r10
|
|
SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
|
|
return DST;
|
|
}
|
|
|
|
SDValue
|
|
AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
|
|
{
|
|
return SDValue(Op.getNode(), 0);
|
|
}
|