forked from OSchip/llvm-project
Revert "AMDGPU: Add core backend files for R600/SI codegen v6"
This reverts commit 4ea70107c5e51230e9e60f0bf58a0f74aa4885ea. llvm-svn: 160303
This commit is contained in:
parent
adf452260f
commit
1be1aa84ec
|
@ -1,35 +0,0 @@
|
|||
//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDGPU_H
|
||||
#define AMDGPU_H
|
||||
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class FunctionPass;
|
||||
class AMDGPUTargetMachine;
|
||||
|
||||
// R600 Passes
|
||||
FunctionPass* createR600KernelParametersPass(const TargetData* TD);
|
||||
FunctionPass *createR600CodeEmitterPass(formatted_raw_ostream &OS);
|
||||
|
||||
// SI Passes
|
||||
FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
|
||||
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
|
||||
|
||||
// Passes common to R600 and SI
|
||||
FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
#endif // AMDGPU_H
|
|
@ -1,21 +0,0 @@
|
|||
//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
|
||||
// Include AMDIL TD files
|
||||
include "AMDILBase.td"
|
||||
include "AMDILVersion.td"
|
||||
|
||||
// Include AMDGPU TD files
|
||||
include "R600Schedule.td"
|
||||
include "SISchedule.td"
|
||||
include "Processors.td"
|
||||
include "AMDGPUInstrInfo.td"
|
||||
include "AMDGPUIntrinsics.td"
|
||||
include "AMDGPURegisterInfo.td"
|
||||
include "AMDGPUInstructions.td"
|
|
@ -1,63 +0,0 @@
|
|||
//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass lowers AMDIL machine instructions to the appropriate hardware
|
||||
// instructions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUInstrInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
|
||||
#include <stdio.h>
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class AMDGPUConvertToISAPass : public MachineFunctionPass {
|
||||
|
||||
private:
|
||||
static char ID;
|
||||
TargetMachine &TM;
|
||||
|
||||
public:
|
||||
AMDGPUConvertToISAPass(TargetMachine &tm) :
|
||||
MachineFunctionPass(ID), TM(tm) { }
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
|
||||
|
||||
};
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
char AMDGPUConvertToISAPass::ID = 0;
|
||||
|
||||
FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
|
||||
return new AMDGPUConvertToISAPass(tm);
|
||||
}
|
||||
|
||||
bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF)
|
||||
{
|
||||
const AMDGPUInstrInfo * TII =
|
||||
static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
|
||||
|
||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||
BB != BB_E; ++BB) {
|
||||
MachineBasicBlock &MBB = *BB;
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||
I != E; ++I) {
|
||||
MachineInstr &MI = *I;
|
||||
TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
|
@ -1,393 +0,0 @@
|
|||
//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This is the parent TargetLowering class for hardware code gen targets.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUISelLowering.h"
|
||||
#include "AMDILIntrinsicInfo.h"
|
||||
#include "AMDGPUUtil.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
|
||||
AMDILTargetLowering(TM)
|
||||
{
|
||||
// We need to custom lower some of the intrinsics
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||
|
||||
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
|
||||
|
||||
// Library functions. These default to Expand, but we have instructions
|
||||
// for them.
|
||||
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FEXP2, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FRINT, MVT::f32, Legal);
|
||||
|
||||
setOperationAction(ISD::UDIV, MVT::i32, Expand);
|
||||
setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
|
||||
setOperationAction(ISD::UREM, MVT::i32, Expand);
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
|
||||
const
|
||||
{
|
||||
switch (Op.getOpcode()) {
|
||||
default: return AMDILTargetLowering::LowerOperation(Op, DAG);
|
||||
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
|
||||
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
|
||||
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
|
||||
}
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
SelectionDAG &DAG) const
|
||||
{
|
||||
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
switch (IntrinsicID) {
|
||||
default: return Op;
|
||||
case AMDGPUIntrinsic::AMDIL_abs:
|
||||
return LowerIntrinsicIABS(Op, DAG);
|
||||
case AMDGPUIntrinsic::AMDIL_exp:
|
||||
return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
|
||||
case AMDGPUIntrinsic::AMDIL_fabs:
|
||||
return DAG.getNode(ISD::FABS, DL, VT, Op.getOperand(1));
|
||||
case AMDGPUIntrinsic::AMDGPU_lrp:
|
||||
return LowerIntrinsicLRP(Op, DAG);
|
||||
case AMDGPUIntrinsic::AMDIL_fraction:
|
||||
return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
|
||||
case AMDGPUIntrinsic::AMDIL_mad:
|
||||
return DAG.getNode(AMDILISD::MAD, DL, VT, Op.getOperand(1),
|
||||
Op.getOperand(2), Op.getOperand(3));
|
||||
case AMDGPUIntrinsic::AMDIL_max:
|
||||
return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
|
||||
Op.getOperand(2));
|
||||
case AMDGPUIntrinsic::AMDGPU_imax:
|
||||
return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
|
||||
Op.getOperand(2));
|
||||
case AMDGPUIntrinsic::AMDGPU_umax:
|
||||
return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
|
||||
Op.getOperand(2));
|
||||
case AMDGPUIntrinsic::AMDIL_min:
|
||||
return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
|
||||
Op.getOperand(2));
|
||||
case AMDGPUIntrinsic::AMDGPU_imin:
|
||||
return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
|
||||
Op.getOperand(2));
|
||||
case AMDGPUIntrinsic::AMDGPU_umin:
|
||||
return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
|
||||
Op.getOperand(2));
|
||||
case AMDGPUIntrinsic::AMDIL_round_nearest:
|
||||
return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
|
||||
case AMDGPUIntrinsic::AMDIL_round_posinf:
|
||||
return DAG.getNode(ISD::FCEIL, DL, VT, Op.getOperand(1));
|
||||
}
|
||||
}
|
||||
|
||||
///IABS(a) = SMAX(sub(0, a), a)
|
||||
SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
|
||||
SelectionDAG &DAG) const
|
||||
{
|
||||
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT VT = Op.getValueType();
|
||||
SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
|
||||
Op.getOperand(1));
|
||||
|
||||
return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
|
||||
}
|
||||
|
||||
/// Linear Interpolation
|
||||
/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
|
||||
SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
|
||||
SelectionDAG &DAG) const
|
||||
{
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT VT = Op.getValueType();
|
||||
SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
|
||||
DAG.getConstantFP(1.0f, MVT::f32),
|
||||
Op.getOperand(1));
|
||||
SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
|
||||
Op.getOperand(3));
|
||||
return DAG.getNode(AMDILISD::MAD, DL, VT, Op.getOperand(1),
|
||||
Op.getOperand(2),
|
||||
OneSubAC);
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::LowerSELECT_CC(SDValue Op,
|
||||
SelectionDAG &DAG) const
|
||||
{
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
SDValue RHS = Op.getOperand(1);
|
||||
SDValue True = Op.getOperand(2);
|
||||
SDValue False = Op.getOperand(3);
|
||||
SDValue CC = Op.getOperand(4);
|
||||
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
|
||||
SDValue Temp;
|
||||
|
||||
// LHS and RHS are guaranteed to be the same value type
|
||||
EVT CompareVT = LHS.getValueType();
|
||||
|
||||
// We need all the operands of SELECT_CC to have the same value type, so if
|
||||
// necessary we need to convert LHS and RHS to be the same type True and
|
||||
// False. True and False are guaranteed to have the same type as this
|
||||
// SELECT_CC node.
|
||||
|
||||
if (CompareVT != VT) {
|
||||
ISD::NodeType ConversionOp = ISD::DELETED_NODE;
|
||||
if (VT == MVT::f32 && CompareVT == MVT::i32) {
|
||||
if (isUnsignedIntSetCC(CCOpcode)) {
|
||||
ConversionOp = ISD::UINT_TO_FP;
|
||||
} else {
|
||||
ConversionOp = ISD::SINT_TO_FP;
|
||||
}
|
||||
} else if (VT == MVT::i32 && CompareVT == MVT::f32) {
|
||||
ConversionOp = ISD::FP_TO_SINT;
|
||||
} else {
|
||||
// I don't think there will be any other type pairings.
|
||||
assert(!"Unhandled operand type parings in SELECT_CC");
|
||||
}
|
||||
// XXX Check the value of LHS and RHS and avoid creating sequences like
|
||||
// (FTOI (ITOF))
|
||||
LHS = DAG.getNode(ConversionOp, DL, VT, LHS);
|
||||
RHS = DAG.getNode(ConversionOp, DL, VT, RHS);
|
||||
}
|
||||
|
||||
// If True is a hardware TRUE value and False is a hardware FALSE value or
|
||||
// vice-versa we can handle this with a native instruction (SET* instructions).
|
||||
if ((isHWTrueValue(True) && isHWFalseValue(False))) {
|
||||
return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
|
||||
}
|
||||
|
||||
// XXX If True is a hardware TRUE value and False is a hardware FALSE value,
|
||||
// we can handle this with a native instruction, but we need to swap true
|
||||
// and false and change the conditional.
|
||||
if (isHWTrueValue(False) && isHWFalseValue(True)) {
|
||||
}
|
||||
|
||||
// XXX Check if we can lower this to a SELECT or if it is supported by a native
|
||||
// operation. (The code below does this but we don't have the Instruction
|
||||
// selection patterns to do this yet.
|
||||
#if 0
|
||||
if (isZero(LHS) || isZero(RHS)) {
|
||||
SDValue Cond = (isZero(LHS) ? RHS : LHS);
|
||||
bool SwapTF = false;
|
||||
switch (CCOpcode) {
|
||||
case ISD::SETOEQ:
|
||||
case ISD::SETUEQ:
|
||||
case ISD::SETEQ:
|
||||
SwapTF = true;
|
||||
// Fall through
|
||||
case ISD::SETONE:
|
||||
case ISD::SETUNE:
|
||||
case ISD::SETNE:
|
||||
// We can lower to select
|
||||
if (SwapTF) {
|
||||
Temp = True;
|
||||
True = False;
|
||||
False = Temp;
|
||||
}
|
||||
// CNDE
|
||||
return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
|
||||
default:
|
||||
// Supported by a native operation (CNDGE, CNDGT)
|
||||
return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// If we make it this for it means we have no native instructions to handle
|
||||
// this SELECT_CC, so we must lower it.
|
||||
SDValue HWTrue, HWFalse;
|
||||
|
||||
if (VT == MVT::f32) {
|
||||
HWTrue = DAG.getConstantFP(1.0f, VT);
|
||||
HWFalse = DAG.getConstantFP(0.0f, VT);
|
||||
} else if (VT == MVT::i32) {
|
||||
HWTrue = DAG.getConstant(-1, VT);
|
||||
HWFalse = DAG.getConstant(0, VT);
|
||||
}
|
||||
else {
|
||||
assert(!"Unhandled value type in LowerSELECT_CC");
|
||||
}
|
||||
|
||||
// Lower this unsupported SELECT_CC into a combination of two supported
|
||||
// SELECT_CC operations.
|
||||
SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
|
||||
|
||||
return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
|
||||
}
|
||||
|
||||
|
||||
SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
|
||||
SelectionDAG &DAG) const
|
||||
{
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
SDValue Num = Op.getOperand(0);
|
||||
SDValue Den = Op.getOperand(1);
|
||||
|
||||
SmallVector<SDValue, 8> Results;
|
||||
|
||||
// RCP = URECIP(Den) = 2^32 / Den + e
|
||||
// e is rounding error.
|
||||
SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
|
||||
|
||||
// RCP_LO = umulo(RCP, Den) */
|
||||
SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
|
||||
|
||||
// RCP_HI = mulhu (RCP, Den) */
|
||||
SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
|
||||
|
||||
// NEG_RCP_LO = -RCP_LO
|
||||
SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
|
||||
RCP_LO);
|
||||
|
||||
// ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
|
||||
SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
|
||||
NEG_RCP_LO, RCP_LO,
|
||||
ISD::SETEQ);
|
||||
// Calculate the rounding error from the URECIP instruction
|
||||
// E = mulhu(ABS_RCP_LO, RCP)
|
||||
SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
|
||||
|
||||
// RCP_A_E = RCP + E
|
||||
SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
|
||||
|
||||
// RCP_S_E = RCP - E
|
||||
SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
|
||||
|
||||
// Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
|
||||
SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
|
||||
RCP_A_E, RCP_S_E,
|
||||
ISD::SETEQ);
|
||||
// Quotient = mulhu(Tmp0, Num)
|
||||
SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
|
||||
|
||||
// Num_S_Remainder = Quotient * Den
|
||||
SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
|
||||
|
||||
// Remainder = Num - Num_S_Remainder
|
||||
SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
|
||||
|
||||
// Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
|
||||
SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
|
||||
DAG.getConstant(-1, VT),
|
||||
DAG.getConstant(0, VT),
|
||||
ISD::SETGE);
|
||||
// Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
|
||||
SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
|
||||
DAG.getConstant(0, VT),
|
||||
DAG.getConstant(-1, VT),
|
||||
DAG.getConstant(0, VT),
|
||||
ISD::SETGE);
|
||||
// Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
|
||||
SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
|
||||
Remainder_GE_Zero);
|
||||
|
||||
// Calculate Division result:
|
||||
|
||||
// Quotient_A_One = Quotient + 1
|
||||
SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
|
||||
DAG.getConstant(1, VT));
|
||||
|
||||
// Quotient_S_One = Quotient - 1
|
||||
SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
|
||||
DAG.getConstant(1, VT));
|
||||
|
||||
// Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
|
||||
SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
|
||||
Quotient, Quotient_A_One, ISD::SETEQ);
|
||||
|
||||
// Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
|
||||
Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
|
||||
Quotient_S_One, Div, ISD::SETEQ);
|
||||
|
||||
// Calculate Rem result:
|
||||
|
||||
// Remainder_S_Den = Remainder - Den
|
||||
SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
|
||||
|
||||
// Remainder_A_Den = Remainder + Den
|
||||
SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
|
||||
|
||||
// Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
|
||||
SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
|
||||
Remainder, Remainder_S_Den, ISD::SETEQ);
|
||||
|
||||
// Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
|
||||
Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
|
||||
Remainder_A_Den, Rem, ISD::SETEQ);
|
||||
|
||||
DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div);
|
||||
DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem);
|
||||
|
||||
return Op;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Helper functions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const
|
||||
{
|
||||
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
|
||||
return CFP->isExactlyValue(1.0);
|
||||
}
|
||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
|
||||
return C->isAllOnesValue();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const
|
||||
{
|
||||
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
|
||||
return CFP->getValueAPF().isZero();
|
||||
}
|
||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
|
||||
return C->isNullValue();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void AMDGPUTargetLowering::addLiveIn(MachineInstr * MI,
|
||||
MachineFunction * MF, MachineRegisterInfo & MRI,
|
||||
const TargetInstrInfo * TII, unsigned reg) const
|
||||
{
|
||||
AMDGPU::utilAddLiveIn(MF, MRI, TII, reg, MI->getOperand(0).getReg());
|
||||
}
|
||||
|
||||
#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
|
||||
|
||||
const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
|
||||
{
|
||||
switch (Opcode) {
|
||||
default: return AMDILTargetLowering::getTargetNodeName(Opcode);
|
||||
|
||||
NODE_NAME_CASE(FRACT)
|
||||
NODE_NAME_CASE(FMAX)
|
||||
NODE_NAME_CASE(SMAX)
|
||||
NODE_NAME_CASE(UMAX)
|
||||
NODE_NAME_CASE(FMIN)
|
||||
NODE_NAME_CASE(SMIN)
|
||||
NODE_NAME_CASE(UMIN)
|
||||
NODE_NAME_CASE(URECIP)
|
||||
}
|
||||
}
|
|
@ -1,77 +0,0 @@
|
|||
//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the interface defintiion of the TargetLowering class
|
||||
// that is common to all AMD GPUs.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDGPUISELLOWERING_H
|
||||
#define AMDGPUISELLOWERING_H
|
||||
|
||||
#include "AMDILISelLowering.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUTargetLowering : public AMDILTargetLowering
|
||||
{
|
||||
private:
|
||||
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
protected:
|
||||
|
||||
/// addLiveIn - This functions adds reg to the live in list of the entry block
|
||||
/// and emits a copy from reg to MI.getOperand(0).
|
||||
///
|
||||
// Some registers are loaded with values before the program
|
||||
/// begins to execute. The loading of these values is modeled with pseudo
|
||||
/// instructions which are lowered using this function.
|
||||
void addLiveIn(MachineInstr * MI, MachineFunction * MF,
|
||||
MachineRegisterInfo & MRI, const TargetInstrInfo * TII,
|
||||
unsigned reg) const;
|
||||
|
||||
bool isHWTrueValue(SDValue Op) const;
|
||||
bool isHWFalseValue(SDValue Op) const;
|
||||
|
||||
public:
|
||||
AMDGPUTargetLowering(TargetMachine &TM);
|
||||
|
||||
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
|
||||
virtual const char* getTargetNodeName(unsigned Opcode) const;
|
||||
|
||||
};
|
||||
|
||||
namespace AMDGPUISD
|
||||
{
|
||||
|
||||
enum
|
||||
{
|
||||
AMDGPU_FIRST = AMDILISD::LAST_ISD_NUMBER,
|
||||
BITALIGN,
|
||||
FRACT,
|
||||
FMAX,
|
||||
SMAX,
|
||||
UMAX,
|
||||
FMIN,
|
||||
SMIN,
|
||||
UMIN,
|
||||
URECIP,
|
||||
LAST_AMDGPU_ISD_NUMBER
|
||||
};
|
||||
|
||||
|
||||
} // End namespace AMDGPUISD
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
#endif // AMDGPUISELLOWERING_H
|
|
@ -1,46 +0,0 @@
|
|||
//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the implementation of the TargetInstrInfo class that is
|
||||
// common to all AMD GPUs.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUInstrInfo.h"
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "AMDIL.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AMDGPUInstrInfo::AMDGPUInstrInfo(AMDGPUTargetMachine &tm)
|
||||
: AMDILInstrInfo(tm) { }
|
||||
|
||||
void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
|
||||
DebugLoc DL) const
|
||||
{
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
const AMDGPURegisterInfo & RI = getRegisterInfo();
|
||||
|
||||
for (unsigned i = 0; i < MI.getNumOperands(); i++) {
|
||||
MachineOperand &MO = MI.getOperand(i);
|
||||
// Convert dst regclass to one that is supported by the ISA
|
||||
if (MO.isReg() && MO.isDef()) {
|
||||
if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
|
||||
const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
|
||||
const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
|
||||
|
||||
assert(newRegClass);
|
||||
|
||||
MRI.setRegClass(MO.getReg(), newRegClass);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,46 +0,0 @@
|
|||
//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the definition of a TargetInstrInfo class that is common
|
||||
// to all AMD GPUs.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDGPUINSTRUCTIONINFO_H_
|
||||
#define AMDGPUINSTRUCTIONINFO_H_
|
||||
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
#include "AMDILInstrInfo.h"
|
||||
|
||||
#include <map>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUTargetMachine;
|
||||
class MachineFunction;
|
||||
class MachineInstr;
|
||||
class MachineInstrBuilder;
|
||||
|
||||
class AMDGPUInstrInfo : public AMDILInstrInfo {
|
||||
|
||||
public:
|
||||
explicit AMDGPUInstrInfo(AMDGPUTargetMachine &tm);
|
||||
|
||||
virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
|
||||
|
||||
/// convertToISA - Convert the AMDIL MachineInstr to a supported ISA
|
||||
/// MachineInstr
|
||||
virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
|
||||
DebugLoc DL) const;
|
||||
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#endif // AMDGPUINSTRINFO_H_
|
|
@ -1,69 +0,0 @@
|
|||
//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains DAG node defintions for the AMDGPU target.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDGPU DAG Profiles
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
|
||||
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDGPU DAG Nodes
|
||||
//
|
||||
|
||||
// out = ((a << 32) | b) >> c)
|
||||
//
|
||||
// Can be used to optimize rtol:
|
||||
// rotl(a, b) = bitalign(a, a, 32 - b)
|
||||
def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
|
||||
|
||||
// out = a - floor(a)
|
||||
def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
|
||||
|
||||
// out = max(a, b) a and b are floats
|
||||
def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]
|
||||
>;
|
||||
|
||||
// out = max(a, b) a and b are signed ints
|
||||
def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]
|
||||
>;
|
||||
|
||||
// out = max(a, b) a and b are unsigned ints
|
||||
def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]
|
||||
>;
|
||||
|
||||
// out = min(a, b) a and b are floats
|
||||
def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]
|
||||
>;
|
||||
|
||||
// out = min(a, b) a snd b are signed ints
|
||||
def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]
|
||||
>;
|
||||
|
||||
// out = min(a, b) a and b are unsigned ints
|
||||
def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]
|
||||
>;
|
||||
|
||||
// urecip - This operation is a helper for integer division, it returns the
|
||||
// result of 1 / a as a fractional unsigned integer.
|
||||
// out = (2^32 / a) + e
|
||||
// e is rounding error
|
||||
def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
|
|
@ -1,123 +0,0 @@
|
|||
//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains instruction defs that are common to all hw codegen
|
||||
// targets.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
|
||||
field bits<16> AMDILOp = 0;
|
||||
field bits<3> Gen = 0;
|
||||
|
||||
let Namespace = "AMDGPU";
|
||||
let OutOperandList = outs;
|
||||
let InOperandList = ins;
|
||||
let AsmString = asm;
|
||||
let Pattern = pattern;
|
||||
let Itinerary = NullALU;
|
||||
let TSFlags{42-40} = Gen;
|
||||
let TSFlags{63-48} = AMDILOp;
|
||||
}
|
||||
|
||||
class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
|
||||
: AMDGPUInst<outs, ins, asm, pattern> {
|
||||
|
||||
field bits<32> Inst = 0xffffffff;
|
||||
|
||||
}
|
||||
|
||||
class Constants {
|
||||
int TWO_PI = 0x40c90fdb;
|
||||
int PI = 0x40490fdb;
|
||||
int TWO_PI_INV = 0x3e22f983;
|
||||
}
|
||||
def CONST : Constants;
|
||||
|
||||
def FP_ZERO : PatLeaf <
|
||||
(fpimm),
|
||||
[{return N->getValueAPF().isZero();}]
|
||||
>;
|
||||
|
||||
def FP_ONE : PatLeaf <
|
||||
(fpimm),
|
||||
[{return N->isExactlyValue(1.0);}]
|
||||
>;
|
||||
|
||||
let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in {
|
||||
|
||||
class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
|
||||
(outs rc:$dst),
|
||||
(ins rc:$src0),
|
||||
"CLAMP $dst, $src0",
|
||||
[(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
|
||||
>;
|
||||
|
||||
class FABS <RegisterClass rc> : AMDGPUShaderInst <
|
||||
(outs rc:$dst),
|
||||
(ins rc:$src0),
|
||||
"FABS $dst, $src0",
|
||||
[(set rc:$dst, (fabs rc:$src0))]
|
||||
>;
|
||||
|
||||
class FNEG <RegisterClass rc> : AMDGPUShaderInst <
|
||||
(outs rc:$dst),
|
||||
(ins rc:$src0),
|
||||
"FNEG $dst, $src0",
|
||||
[(set rc:$dst, (fneg rc:$src0))]
|
||||
>;
|
||||
|
||||
} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
|
||||
|
||||
/* Generic helper patterns for intrinsics */
|
||||
/* -------------------------------------- */
|
||||
|
||||
class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
|
||||
RegisterClass rc> : Pat <
|
||||
(int_AMDGPU_pow rc:$src0, rc:$src1),
|
||||
(exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
|
||||
>;
|
||||
|
||||
/* Other helper patterns */
|
||||
/* --------------------- */
|
||||
|
||||
/* Extract element pattern */
|
||||
class Extract_Element <ValueType sub_type, ValueType vec_type,
|
||||
RegisterClass vec_class, int sub_idx,
|
||||
SubRegIndex sub_reg>: Pat<
|
||||
(sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
|
||||
(EXTRACT_SUBREG vec_class:$src, sub_reg)
|
||||
>;
|
||||
|
||||
/* Insert element pattern */
|
||||
class Insert_Element <ValueType elem_type, ValueType vec_type,
|
||||
RegisterClass elem_class, RegisterClass vec_class,
|
||||
int sub_idx, SubRegIndex sub_reg> : Pat <
|
||||
|
||||
(vec_type (vector_insert (vec_type vec_class:$vec),
|
||||
(elem_type elem_class:$elem), sub_idx)),
|
||||
(INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
|
||||
>;
|
||||
|
||||
// Vector Build pattern
|
||||
class Vector_Build <ValueType vecType, RegisterClass elemClass> : Pat <
|
||||
(IL_vbuild elemClass:$src),
|
||||
(INSERT_SUBREG (vecType (IMPLICIT_DEF)), elemClass:$src, sel_x)
|
||||
>;
|
||||
|
||||
// bitconvert pattern
|
||||
class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
|
||||
(dt (bitconvert (st rc:$src0))),
|
||||
(dt rc:$src0)
|
||||
>;
|
||||
|
||||
include "R600Instructions.td"
|
||||
|
||||
include "SIInstrInfo.td"
|
||||
|
|
@ -1,64 +0,0 @@
|
|||
//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines intrinsics that are used by all hw codegen targets.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let TargetPrefix = "AMDGPU", isTarget = 1 in {
|
||||
|
||||
def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_cos : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_floor : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
|
||||
def int_AMDGPU_kilp : Intrinsic<[], [], []>;
|
||||
def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_sin : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_ssg : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
let TargetPrefix = "TGSI", isTarget = 1 in {
|
||||
|
||||
def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[]>;
|
||||
}
|
||||
|
||||
include "SIIntrinsics.td"
|
|
@ -1,24 +0,0 @@
|
|||
//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Parent TargetRegisterInfo class common to all hw codegen targets.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AMDGPURegisterInfo::AMDGPURegisterInfo(AMDGPUTargetMachine &tm,
|
||||
const TargetInstrInfo &tii)
|
||||
: AMDILRegisterInfo(tm, tii),
|
||||
TM(tm),
|
||||
TII(tii)
|
||||
{ }
|
|
@ -1,42 +0,0 @@
|
|||
//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the TargetRegisterInfo interface that is implemented
|
||||
// by all hw codegen targets.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDGPUREGISTERINFO_H_
|
||||
#define AMDGPUREGISTERINFO_H_
|
||||
|
||||
#include "AMDILRegisterInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUTargetMachine;
|
||||
class TargetInstrInfo;
|
||||
|
||||
struct AMDGPURegisterInfo : public AMDILRegisterInfo
|
||||
{
|
||||
AMDGPUTargetMachine &TM;
|
||||
const TargetInstrInfo &TII;
|
||||
|
||||
AMDGPURegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
|
||||
|
||||
virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0;
|
||||
|
||||
/// getISARegClass - rc is an AMDIL reg class. This function returns the
|
||||
/// ISA reg class that is equivalent to the given AMDIL reg class.
|
||||
virtual const TargetRegisterClass *
|
||||
getISARegClass(const TargetRegisterClass * rc) const = 0;
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
#endif // AMDIDSAREGISTERINFO_H_
|
|
@ -1,22 +0,0 @@
|
|||
//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Tablegen register definitions common to all hw codegen targets.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Namespace = "AMDGPU" in {
|
||||
def sel_x : SubRegIndex;
|
||||
def sel_y : SubRegIndex;
|
||||
def sel_z : SubRegIndex;
|
||||
def sel_w : SubRegIndex;
|
||||
}
|
||||
|
||||
include "R600RegisterInfo.td"
|
||||
include "SIRegisterInfo.td"
|
|
@ -1,36 +0,0 @@
|
|||
//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file declares the AMDGPU specific subclass of TargetSubtarget.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef _AMDGPUSUBTARGET_H_
|
||||
#define _AMDGPUSUBTARGET_H_
|
||||
#include "AMDILSubtarget.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUSubtarget : public AMDILSubtarget
|
||||
{
|
||||
InstrItineraryData InstrItins;
|
||||
|
||||
public:
|
||||
AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
|
||||
AMDILSubtarget(TT, CPU, FS)
|
||||
{
|
||||
InstrItins = getInstrItineraryForCPU(CPU);
|
||||
}
|
||||
|
||||
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
#endif // AMDGPUSUBTARGET_H_
|
|
@ -1,162 +0,0 @@
|
|||
//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The AMDGPU target machine contains all of the hardware specific information
|
||||
// needed to emit code for R600 and SI GPUs.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "AMDGPU.h"
|
||||
#include "R600ISelLowering.h"
|
||||
#include "R600InstrInfo.h"
|
||||
#include "SIISelLowering.h"
|
||||
#include "SIInstrInfo.h"
|
||||
#include "llvm/Analysis/Passes.h"
|
||||
#include "llvm/Analysis/Verifier.h"
|
||||
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
|
||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
#include "llvm/PassManager.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
#include "llvm/Support/raw_os_ostream.h"
|
||||
#include "llvm/Transforms/IPO.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
extern "C" void LLVMInitializeAMDGPUTarget() {
|
||||
// Register the target
|
||||
RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
|
||||
}
|
||||
|
||||
AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
|
||||
StringRef CPU, StringRef FS,
|
||||
TargetOptions Options,
|
||||
Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OptLevel
|
||||
)
|
||||
:
|
||||
LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
|
||||
Subtarget(TT, CPU, FS),
|
||||
DataLayout(Subtarget.getDataLayout()),
|
||||
FrameLowering(TargetFrameLowering::StackGrowsUp,
|
||||
Subtarget.device()->getStackAlignment(), 0),
|
||||
IntrinsicInfo(this),
|
||||
InstrItins(&Subtarget.getInstrItineraryData()),
|
||||
mDump(false)
|
||||
|
||||
{
|
||||
// TLInfo uses InstrInfo so it must be initialized after.
|
||||
if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
|
||||
InstrInfo = new R600InstrInfo(*this);
|
||||
TLInfo = new R600TargetLowering(*this);
|
||||
} else {
|
||||
InstrInfo = new SIInstrInfo(*this);
|
||||
TLInfo = new SITargetLowering(*this);
|
||||
}
|
||||
}
|
||||
|
||||
AMDGPUTargetMachine::~AMDGPUTargetMachine()
|
||||
{
|
||||
}
|
||||
|
||||
bool AMDGPUTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
|
||||
formatted_raw_ostream &Out,
|
||||
CodeGenFileType FileType,
|
||||
bool DisableVerify,
|
||||
AnalysisID StartAfter,
|
||||
AnalysisID StopAfter) {
|
||||
// XXX: Hack here addPassesToEmitFile will fail, but this is Ok since we are
|
||||
// only using it to access addPassesToGenerateCode()
|
||||
bool fail = LLVMTargetMachine::addPassesToEmitFile(PM, Out, FileType,
|
||||
DisableVerify);
|
||||
assert(fail);
|
||||
|
||||
const AMDILSubtarget &STM = getSubtarget<AMDILSubtarget>();
|
||||
std::string gpu = STM.getDeviceName();
|
||||
if (gpu == "SI") {
|
||||
PM.add(createSICodeEmitterPass(Out));
|
||||
} else if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
|
||||
PM.add(createR600CodeEmitterPass(Out));
|
||||
} else {
|
||||
abort();
|
||||
return true;
|
||||
}
|
||||
PM.add(createGCInfoDeleter());
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
namespace {
|
||||
class AMDGPUPassConfig : public TargetPassConfig {
|
||||
public:
|
||||
AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
|
||||
: TargetPassConfig(TM, PM) {}
|
||||
|
||||
AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
|
||||
return getTM<AMDGPUTargetMachine>();
|
||||
}
|
||||
|
||||
virtual bool addPreISel();
|
||||
virtual bool addInstSelector();
|
||||
virtual bool addPreRegAlloc();
|
||||
virtual bool addPostRegAlloc();
|
||||
virtual bool addPreSched2();
|
||||
virtual bool addPreEmitPass();
|
||||
};
|
||||
} // End of anonymous namespace
|
||||
|
||||
TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
|
||||
return new AMDGPUPassConfig(this, PM);
|
||||
}
|
||||
|
||||
bool
|
||||
AMDGPUPassConfig::addPreISel()
|
||||
{
|
||||
const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>();
|
||||
if (ST.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
|
||||
addPass(createR600KernelParametersPass(
|
||||
getAMDGPUTargetMachine().getTargetData()));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUPassConfig::addInstSelector() {
|
||||
addPass(createAMDILPeepholeOpt(*TM));
|
||||
addPass(createAMDILISelDag(getAMDGPUTargetMachine()));
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUPassConfig::addPreRegAlloc() {
|
||||
const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>();
|
||||
|
||||
if (ST.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
|
||||
addPass(createSIAssignInterpRegsPass(*TM));
|
||||
}
|
||||
addPass(createAMDGPUConvertToISAPass(*TM));
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUPassConfig::addPostRegAlloc() {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUPassConfig::addPreSched2() {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUPassConfig::addPreEmitPass() {
|
||||
addPass(createAMDILCFGPreparationPass(*TM));
|
||||
addPass(createAMDILCFGStructurizerPass(*TM));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
|
@ -1,76 +0,0 @@
|
|||
//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The AMDGPU TargetMachine interface definition for hw codgen targets.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDGPU_TARGET_MACHINE_H
|
||||
#define AMDGPU_TARGET_MACHINE_H
|
||||
|
||||
#include "AMDGPUInstrInfo.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "AMDILFrameLowering.h"
|
||||
#include "AMDILIntrinsicInfo.h"
|
||||
#include "R600ISelLowering.h"
|
||||
#include "llvm/ADT/OwningPtr.h"
|
||||
#include "llvm/Target/TargetData.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
|
||||
|
||||
class AMDGPUTargetMachine : public LLVMTargetMachine {
|
||||
|
||||
AMDGPUSubtarget Subtarget;
|
||||
const TargetData DataLayout;
|
||||
AMDILFrameLowering FrameLowering;
|
||||
AMDILIntrinsicInfo IntrinsicInfo;
|
||||
const AMDGPUInstrInfo * InstrInfo;
|
||||
AMDGPUTargetLowering * TLInfo;
|
||||
const InstrItineraryData* InstrItins;
|
||||
bool mDump;
|
||||
|
||||
public:
|
||||
AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
|
||||
StringRef CPU,
|
||||
TargetOptions Options,
|
||||
Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL);
|
||||
~AMDGPUTargetMachine();
|
||||
virtual const AMDILFrameLowering* getFrameLowering() const {
|
||||
return &FrameLowering;
|
||||
}
|
||||
virtual const AMDILIntrinsicInfo* getIntrinsicInfo() const {
|
||||
return &IntrinsicInfo;
|
||||
}
|
||||
virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
|
||||
virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
|
||||
virtual const AMDGPURegisterInfo *getRegisterInfo() const {
|
||||
return &InstrInfo->getRegisterInfo();
|
||||
}
|
||||
virtual AMDGPUTargetLowering * getTargetLowering() const {
|
||||
return TLInfo;
|
||||
}
|
||||
virtual const InstrItineraryData* getInstrItineraryData() const {
|
||||
return InstrItins;
|
||||
}
|
||||
virtual const TargetData* getTargetData() const { return &DataLayout; }
|
||||
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
|
||||
virtual bool addPassesToEmitFile(PassManagerBase &PM,
|
||||
formatted_raw_ostream &Out,
|
||||
CodeGenFileType FileType,
|
||||
bool DisableVerify,
|
||||
AnalysisID StartAfter = 0,
|
||||
AnalysisID StopAfter = 0);
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
#endif // AMDGPU_TARGET_MACHINE_H
|
|
@ -1,139 +0,0 @@
|
|||
//===-- AMDGPUUtil.cpp - AMDGPU Utility functions -------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Common utility functions used by hw codegen targets
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUUtil.h"
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
#include "AMDIL.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
// Some instructions act as place holders to emulate operations that the GPU
|
||||
// hardware does automatically. This function can be used to check if
|
||||
// an opcode falls into this category.
|
||||
bool AMDGPU::isPlaceHolderOpcode(unsigned opcode)
|
||||
{
|
||||
switch (opcode) {
|
||||
default: return false;
|
||||
case AMDGPU::RETURN:
|
||||
case AMDGPU::LOAD_INPUT:
|
||||
case AMDGPU::LAST:
|
||||
case AMDGPU::MASK_WRITE:
|
||||
case AMDGPU::RESERVE_REG:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool AMDGPU::isTransOp(unsigned opcode)
|
||||
{
|
||||
switch(opcode) {
|
||||
default: return false;
|
||||
|
||||
case AMDGPU::COS_r600:
|
||||
case AMDGPU::COS_eg:
|
||||
case AMDGPU::MULLIT:
|
||||
case AMDGPU::MUL_LIT_r600:
|
||||
case AMDGPU::MUL_LIT_eg:
|
||||
case AMDGPU::EXP_IEEE_r600:
|
||||
case AMDGPU::EXP_IEEE_eg:
|
||||
case AMDGPU::LOG_CLAMPED_r600:
|
||||
case AMDGPU::LOG_IEEE_r600:
|
||||
case AMDGPU::LOG_CLAMPED_eg:
|
||||
case AMDGPU::LOG_IEEE_eg:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool AMDGPU::isTexOp(unsigned opcode)
|
||||
{
|
||||
switch(opcode) {
|
||||
default: return false;
|
||||
case AMDGPU::TEX_LD:
|
||||
case AMDGPU::TEX_GET_TEXTURE_RESINFO:
|
||||
case AMDGPU::TEX_SAMPLE:
|
||||
case AMDGPU::TEX_SAMPLE_C:
|
||||
case AMDGPU::TEX_SAMPLE_L:
|
||||
case AMDGPU::TEX_SAMPLE_C_L:
|
||||
case AMDGPU::TEX_SAMPLE_LB:
|
||||
case AMDGPU::TEX_SAMPLE_C_LB:
|
||||
case AMDGPU::TEX_SAMPLE_G:
|
||||
case AMDGPU::TEX_SAMPLE_C_G:
|
||||
case AMDGPU::TEX_GET_GRADIENTS_H:
|
||||
case AMDGPU::TEX_GET_GRADIENTS_V:
|
||||
case AMDGPU::TEX_SET_GRADIENTS_H:
|
||||
case AMDGPU::TEX_SET_GRADIENTS_V:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool AMDGPU::isReductionOp(unsigned opcode)
|
||||
{
|
||||
switch(opcode) {
|
||||
default: return false;
|
||||
case AMDGPU::DOT4_r600:
|
||||
case AMDGPU::DOT4_eg:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool AMDGPU::isCubeOp(unsigned opcode)
|
||||
{
|
||||
switch(opcode) {
|
||||
default: return false;
|
||||
case AMDGPU::CUBE_r600:
|
||||
case AMDGPU::CUBE_eg:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool AMDGPU::isFCOp(unsigned opcode)
|
||||
{
|
||||
switch(opcode) {
|
||||
default: return false;
|
||||
case AMDGPU::BREAK_LOGICALZ_f32:
|
||||
case AMDGPU::BREAK_LOGICALNZ_i32:
|
||||
case AMDGPU::BREAK_LOGICALZ_i32:
|
||||
case AMDGPU::BREAK_LOGICALNZ_f32:
|
||||
case AMDGPU::CONTINUE_LOGICALNZ_f32:
|
||||
case AMDGPU::IF_LOGICALNZ_i32:
|
||||
case AMDGPU::IF_LOGICALZ_f32:
|
||||
case AMDGPU::ELSE:
|
||||
case AMDGPU::ENDIF:
|
||||
case AMDGPU::ENDLOOP:
|
||||
case AMDGPU::IF_LOGICALNZ_f32:
|
||||
case AMDGPU::WHILELOOP:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
void AMDGPU::utilAddLiveIn(MachineFunction * MF,
|
||||
MachineRegisterInfo & MRI,
|
||||
const TargetInstrInfo * TII,
|
||||
unsigned physReg, unsigned virtReg)
|
||||
{
|
||||
if (!MRI.isLiveIn(physReg)) {
|
||||
MRI.addLiveIn(physReg, virtReg);
|
||||
MF->front().addLiveIn(physReg);
|
||||
BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
|
||||
TII->get(TargetOpcode::COPY), virtReg)
|
||||
.addReg(physReg);
|
||||
} else {
|
||||
MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg));
|
||||
}
|
||||
}
|
|
@ -1,46 +0,0 @@
|
|||
//===-- AMDGPUUtil.h - AMDGPU Utility function declarations -----*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Declarations for utility functions common to all hw codegen targets.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDGPU_UTIL_H
|
||||
#define AMDGPU_UTIL_H
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class MachineFunction;
|
||||
class MachineRegisterInfo;
|
||||
class TargetInstrInfo;
|
||||
|
||||
namespace AMDGPU {
|
||||
|
||||
bool isPlaceHolderOpcode(unsigned opcode);
|
||||
|
||||
bool isTransOp(unsigned opcode);
|
||||
bool isTexOp(unsigned opcode);
|
||||
bool isReductionOp(unsigned opcode);
|
||||
bool isCubeOp(unsigned opcode);
|
||||
bool isFCOp(unsigned opcode);
|
||||
|
||||
// XXX: Move these to AMDGPUInstrInfo.h
|
||||
#define MO_FLAG_CLAMP (1 << 0)
|
||||
#define MO_FLAG_NEG (1 << 1)
|
||||
#define MO_FLAG_ABS (1 << 2)
|
||||
#define MO_FLAG_MASK (1 << 3)
|
||||
|
||||
void utilAddLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI,
|
||||
const TargetInstrInfo * TII, unsigned physReg, unsigned virtReg);
|
||||
|
||||
} // End namespace AMDGPU
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
#endif // AMDGPU_UTIL_H
|
|
@ -1,251 +0,0 @@
|
|||
//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the entry points for global functions defined in the LLVM
|
||||
// AMDIL back-end.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDIL_H_
|
||||
#define AMDIL_H_
|
||||
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
#define AMDIL_MAJOR_VERSION 2
|
||||
#define AMDIL_MINOR_VERSION 0
|
||||
#define AMDIL_REVISION_NUMBER 74
|
||||
#define ARENA_SEGMENT_RESERVED_UAVS 12
|
||||
#define DEFAULT_ARENA_UAV_ID 8
|
||||
#define DEFAULT_RAW_UAV_ID 7
|
||||
#define GLOBAL_RETURN_RAW_UAV_ID 11
|
||||
#define HW_MAX_NUM_CB 8
|
||||
#define MAX_NUM_UNIQUE_UAVS 8
|
||||
#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
|
||||
#define OPENCL_MAX_READ_IMAGES 128
|
||||
#define OPENCL_MAX_WRITE_IMAGES 8
|
||||
#define OPENCL_MAX_SAMPLERS 16
|
||||
|
||||
// The next two values can never be zero, as zero is the ID that is
|
||||
// used to assert against.
|
||||
#define DEFAULT_LDS_ID 1
|
||||
#define DEFAULT_GDS_ID 1
|
||||
#define DEFAULT_SCRATCH_ID 1
|
||||
#define DEFAULT_VEC_SLOTS 8
|
||||
|
||||
// SC->CAL version matchings.
|
||||
#define CAL_VERSION_SC_150 1700
|
||||
#define CAL_VERSION_SC_149 1700
|
||||
#define CAL_VERSION_SC_148 1525
|
||||
#define CAL_VERSION_SC_147 1525
|
||||
#define CAL_VERSION_SC_146 1525
|
||||
#define CAL_VERSION_SC_145 1451
|
||||
#define CAL_VERSION_SC_144 1451
|
||||
#define CAL_VERSION_SC_143 1441
|
||||
#define CAL_VERSION_SC_142 1441
|
||||
#define CAL_VERSION_SC_141 1420
|
||||
#define CAL_VERSION_SC_140 1400
|
||||
#define CAL_VERSION_SC_139 1387
|
||||
#define CAL_VERSION_SC_138 1387
|
||||
#define CAL_APPEND_BUFFER_SUPPORT 1340
|
||||
#define CAL_VERSION_SC_137 1331
|
||||
#define CAL_VERSION_SC_136 982
|
||||
#define CAL_VERSION_SC_135 950
|
||||
#define CAL_VERSION_GLOBAL_RETURN_BUFFER 990
|
||||
|
||||
#define OCL_DEVICE_RV710 0x0001
|
||||
#define OCL_DEVICE_RV730 0x0002
|
||||
#define OCL_DEVICE_RV770 0x0004
|
||||
#define OCL_DEVICE_CEDAR 0x0008
|
||||
#define OCL_DEVICE_REDWOOD 0x0010
|
||||
#define OCL_DEVICE_JUNIPER 0x0020
|
||||
#define OCL_DEVICE_CYPRESS 0x0040
|
||||
#define OCL_DEVICE_CAICOS 0x0080
|
||||
#define OCL_DEVICE_TURKS 0x0100
|
||||
#define OCL_DEVICE_BARTS 0x0200
|
||||
#define OCL_DEVICE_CAYMAN 0x0400
|
||||
#define OCL_DEVICE_ALL 0x3FFF
|
||||
|
||||
/// The number of function ID's that are reserved for
|
||||
/// internal compiler usage.
|
||||
const unsigned int RESERVED_FUNCS = 1024;
|
||||
|
||||
#define AMDIL_OPT_LEVEL_DECL
|
||||
#define AMDIL_OPT_LEVEL_VAR
|
||||
#define AMDIL_OPT_LEVEL_VAR_NO_COMMA
|
||||
|
||||
namespace llvm {
|
||||
class AMDILInstrPrinter;
|
||||
class FunctionPass;
|
||||
class MCAsmInfo;
|
||||
class raw_ostream;
|
||||
class Target;
|
||||
class TargetMachine;
|
||||
|
||||
/// Instruction selection passes.
|
||||
FunctionPass*
|
||||
createAMDILISelDag(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
|
||||
FunctionPass*
|
||||
createAMDILPeepholeOpt(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
|
||||
|
||||
/// Pre emit passes.
|
||||
FunctionPass*
|
||||
createAMDILCFGPreparationPass(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
|
||||
FunctionPass*
|
||||
createAMDILCFGStructurizerPass(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
|
||||
|
||||
extern Target TheAMDILTarget;
|
||||
extern Target TheAMDGPUTarget;
|
||||
} // end namespace llvm;
|
||||
|
||||
#define GET_REGINFO_ENUM
|
||||
#include "AMDGPUGenRegisterInfo.inc"
|
||||
#define GET_INSTRINFO_ENUM
|
||||
#include "AMDGPUGenInstrInfo.inc"
|
||||
|
||||
/// Include device information enumerations
|
||||
#include "AMDILDeviceInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
/// OpenCL uses address spaces to differentiate between
|
||||
/// various memory regions on the hardware. On the CPU
|
||||
/// all of the address spaces point to the same memory,
|
||||
/// however on the GPU, each address space points to
|
||||
/// a seperate piece of memory that is unique from other
|
||||
/// memory locations.
|
||||
namespace AMDILAS {
|
||||
enum AddressSpaces {
|
||||
PRIVATE_ADDRESS = 0, // Address space for private memory.
|
||||
GLOBAL_ADDRESS = 1, // Address space for global memory (RAT0, VTX0).
|
||||
CONSTANT_ADDRESS = 2, // Address space for constant memory.
|
||||
LOCAL_ADDRESS = 3, // Address space for local memory.
|
||||
REGION_ADDRESS = 4, // Address space for region memory.
|
||||
ADDRESS_NONE = 5, // Address space for unknown memory.
|
||||
PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0)
|
||||
PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1)
|
||||
USER_SGPR_ADDRESS = 8, // Address space for USER_SGPRS on SI
|
||||
LAST_ADDRESS = 9
|
||||
};
|
||||
|
||||
// This union/struct combination is an easy way to read out the
|
||||
// exact bits that are needed.
|
||||
typedef union ResourceRec {
|
||||
struct {
|
||||
#ifdef __BIG_ENDIAN__
|
||||
unsigned short isImage : 1; // Reserved for future use/llvm.
|
||||
unsigned short ResourceID : 10; // Flag to specify the resourece ID for
|
||||
// the op.
|
||||
unsigned short HardwareInst : 1; // Flag to specify that this instruction
|
||||
// is a hardware instruction.
|
||||
unsigned short ConflictPtr : 1; // Flag to specify that the pointer has a
|
||||
// conflict.
|
||||
unsigned short ByteStore : 1; // Flag to specify if the op is a byte
|
||||
// store op.
|
||||
unsigned short PointerPath : 1; // Flag to specify if the op is on the
|
||||
// pointer path.
|
||||
unsigned short CacheableRead : 1; // Flag to specify if the read is
|
||||
// cacheable.
|
||||
#else
|
||||
unsigned short CacheableRead : 1; // Flag to specify if the read is
|
||||
// cacheable.
|
||||
unsigned short PointerPath : 1; // Flag to specify if the op is on the
|
||||
// pointer path.
|
||||
unsigned short ByteStore : 1; // Flag to specify if the op is byte
|
||||
// store op.
|
||||
unsigned short ConflictPtr : 1; // Flag to specify that the pointer has
|
||||
// a conflict.
|
||||
unsigned short HardwareInst : 1; // Flag to specify that this instruction
|
||||
// is a hardware instruction.
|
||||
unsigned short ResourceID : 10; // Flag to specify the resource ID for
|
||||
// the op.
|
||||
unsigned short isImage : 1; // Reserved for future use.
|
||||
#endif
|
||||
} bits;
|
||||
unsigned short u16all;
|
||||
} InstrResEnc;
|
||||
|
||||
} // namespace AMDILAS
|
||||
|
||||
// Enums corresponding to AMDIL condition codes for IL. These
|
||||
// values must be kept in sync with the ones in the .td file.
|
||||
namespace AMDILCC {
|
||||
enum CondCodes {
|
||||
// AMDIL specific condition codes. These correspond to the IL_CC_*
|
||||
// in AMDILInstrInfo.td and must be kept in the same order.
|
||||
IL_CC_D_EQ = 0, // DEQ instruction.
|
||||
IL_CC_D_GE = 1, // DGE instruction.
|
||||
IL_CC_D_LT = 2, // DLT instruction.
|
||||
IL_CC_D_NE = 3, // DNE instruction.
|
||||
IL_CC_F_EQ = 4, // EQ instruction.
|
||||
IL_CC_F_GE = 5, // GE instruction.
|
||||
IL_CC_F_LT = 6, // LT instruction.
|
||||
IL_CC_F_NE = 7, // NE instruction.
|
||||
IL_CC_I_EQ = 8, // IEQ instruction.
|
||||
IL_CC_I_GE = 9, // IGE instruction.
|
||||
IL_CC_I_LT = 10, // ILT instruction.
|
||||
IL_CC_I_NE = 11, // INE instruction.
|
||||
IL_CC_U_GE = 12, // UGE instruction.
|
||||
IL_CC_U_LT = 13, // ULE instruction.
|
||||
// Pseudo IL Comparison instructions here.
|
||||
IL_CC_F_GT = 14, // GT instruction.
|
||||
IL_CC_U_GT = 15,
|
||||
IL_CC_I_GT = 16,
|
||||
IL_CC_D_GT = 17,
|
||||
IL_CC_F_LE = 18, // LE instruction
|
||||
IL_CC_U_LE = 19,
|
||||
IL_CC_I_LE = 20,
|
||||
IL_CC_D_LE = 21,
|
||||
IL_CC_F_UNE = 22,
|
||||
IL_CC_F_UEQ = 23,
|
||||
IL_CC_F_ULT = 24,
|
||||
IL_CC_F_UGT = 25,
|
||||
IL_CC_F_ULE = 26,
|
||||
IL_CC_F_UGE = 27,
|
||||
IL_CC_F_ONE = 28,
|
||||
IL_CC_F_OEQ = 29,
|
||||
IL_CC_F_OLT = 30,
|
||||
IL_CC_F_OGT = 31,
|
||||
IL_CC_F_OLE = 32,
|
||||
IL_CC_F_OGE = 33,
|
||||
IL_CC_D_UNE = 34,
|
||||
IL_CC_D_UEQ = 35,
|
||||
IL_CC_D_ULT = 36,
|
||||
IL_CC_D_UGT = 37,
|
||||
IL_CC_D_ULE = 38,
|
||||
IL_CC_D_UGE = 39,
|
||||
IL_CC_D_ONE = 40,
|
||||
IL_CC_D_OEQ = 41,
|
||||
IL_CC_D_OLT = 42,
|
||||
IL_CC_D_OGT = 43,
|
||||
IL_CC_D_OLE = 44,
|
||||
IL_CC_D_OGE = 45,
|
||||
IL_CC_U_EQ = 46,
|
||||
IL_CC_U_NE = 47,
|
||||
IL_CC_F_O = 48,
|
||||
IL_CC_D_O = 49,
|
||||
IL_CC_F_UO = 50,
|
||||
IL_CC_D_UO = 51,
|
||||
IL_CC_L_LE = 52,
|
||||
IL_CC_L_GE = 53,
|
||||
IL_CC_L_EQ = 54,
|
||||
IL_CC_L_NE = 55,
|
||||
IL_CC_L_LT = 56,
|
||||
IL_CC_L_GT = 57,
|
||||
IL_CC_UL_LE = 58,
|
||||
IL_CC_UL_GE = 59,
|
||||
IL_CC_UL_EQ = 60,
|
||||
IL_CC_UL_NE = 61,
|
||||
IL_CC_UL_LT = 62,
|
||||
IL_CC_UL_GT = 63,
|
||||
COND_ERROR = 64
|
||||
};
|
||||
|
||||
} // end namespace AMDILCC
|
||||
} // end namespace llvm
|
||||
#endif // AMDIL_H_
|
|
@ -1,128 +0,0 @@
|
|||
//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#include "AMDIL7XXDevice.h"
|
||||
#include "AMDILDevice.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AMDIL7XXDevice::AMDIL7XXDevice(AMDILSubtarget *ST) : AMDILDevice(ST)
|
||||
{
|
||||
setCaps();
|
||||
std::string name = mSTM->getDeviceName();
|
||||
if (name == "rv710") {
|
||||
mDeviceFlag = OCL_DEVICE_RV710;
|
||||
} else if (name == "rv730") {
|
||||
mDeviceFlag = OCL_DEVICE_RV730;
|
||||
} else {
|
||||
mDeviceFlag = OCL_DEVICE_RV770;
|
||||
}
|
||||
}
|
||||
|
||||
AMDIL7XXDevice::~AMDIL7XXDevice()
|
||||
{
|
||||
}
|
||||
|
||||
void AMDIL7XXDevice::setCaps()
|
||||
{
|
||||
mSWBits.set(AMDILDeviceInfo::LocalMem);
|
||||
}
|
||||
|
||||
size_t AMDIL7XXDevice::getMaxLDSSize() const
|
||||
{
|
||||
if (usesHardware(AMDILDeviceInfo::LocalMem)) {
|
||||
return MAX_LDS_SIZE_700;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t AMDIL7XXDevice::getWavefrontSize() const
|
||||
{
|
||||
return AMDILDevice::HalfWavefrontSize;
|
||||
}
|
||||
|
||||
uint32_t AMDIL7XXDevice::getGeneration() const
|
||||
{
|
||||
return AMDILDeviceInfo::HD4XXX;
|
||||
}
|
||||
|
||||
uint32_t AMDIL7XXDevice::getResourceID(uint32_t DeviceID) const
|
||||
{
|
||||
switch (DeviceID) {
|
||||
default:
|
||||
assert(0 && "ID type passed in is unknown!");
|
||||
break;
|
||||
case GLOBAL_ID:
|
||||
case CONSTANT_ID:
|
||||
case RAW_UAV_ID:
|
||||
case ARENA_UAV_ID:
|
||||
break;
|
||||
case LDS_ID:
|
||||
if (usesHardware(AMDILDeviceInfo::LocalMem)) {
|
||||
return DEFAULT_LDS_ID;
|
||||
}
|
||||
break;
|
||||
case SCRATCH_ID:
|
||||
if (usesHardware(AMDILDeviceInfo::PrivateMem)) {
|
||||
return DEFAULT_SCRATCH_ID;
|
||||
}
|
||||
break;
|
||||
case GDS_ID:
|
||||
assert(0 && "GDS UAV ID is not supported on this chip");
|
||||
if (usesHardware(AMDILDeviceInfo::RegionMem)) {
|
||||
return DEFAULT_GDS_ID;
|
||||
}
|
||||
break;
|
||||
};
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t AMDIL7XXDevice::getMaxNumUAVs() const
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
AMDIL770Device::AMDIL770Device(AMDILSubtarget *ST): AMDIL7XXDevice(ST)
|
||||
{
|
||||
setCaps();
|
||||
}
|
||||
|
||||
AMDIL770Device::~AMDIL770Device()
|
||||
{
|
||||
}
|
||||
|
||||
void AMDIL770Device::setCaps()
|
||||
{
|
||||
if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
|
||||
mSWBits.set(AMDILDeviceInfo::FMA);
|
||||
mHWBits.set(AMDILDeviceInfo::DoubleOps);
|
||||
}
|
||||
mSWBits.set(AMDILDeviceInfo::BarrierDetect);
|
||||
mHWBits.reset(AMDILDeviceInfo::LongOps);
|
||||
mSWBits.set(AMDILDeviceInfo::LongOps);
|
||||
mSWBits.set(AMDILDeviceInfo::LocalMem);
|
||||
}
|
||||
|
||||
size_t AMDIL770Device::getWavefrontSize() const
|
||||
{
|
||||
return AMDILDevice::WavefrontSize;
|
||||
}
|
||||
|
||||
AMDIL710Device::AMDIL710Device(AMDILSubtarget *ST) : AMDIL7XXDevice(ST)
|
||||
{
|
||||
}
|
||||
|
||||
AMDIL710Device::~AMDIL710Device()
|
||||
{
|
||||
}
|
||||
|
||||
size_t AMDIL710Device::getWavefrontSize() const
|
||||
{
|
||||
return AMDILDevice::QuarterWavefrontSize;
|
||||
}
|
|
@ -1,71 +0,0 @@
|
|||
//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface for the subtarget data classes.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// This file will define the interface that each generation needs to
|
||||
// implement in order to correctly answer queries on the capabilities of the
|
||||
// specific hardware.
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef _AMDIL7XXDEVICEIMPL_H_
|
||||
#define _AMDIL7XXDEVICEIMPL_H_
|
||||
#include "AMDILDevice.h"
|
||||
#include "AMDILSubtarget.h"
|
||||
|
||||
namespace llvm {
|
||||
class AMDILSubtarget;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// 7XX generation of devices and their respective sub classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// The AMDIL7XXDevice class represents the generic 7XX device. All 7XX
|
||||
// devices are derived from this class. The AMDIL7XX device will only
|
||||
// support the minimal features that are required to be considered OpenCL 1.0
|
||||
// compliant and nothing more.
|
||||
class AMDIL7XXDevice : public AMDILDevice {
|
||||
public:
|
||||
AMDIL7XXDevice(AMDILSubtarget *ST);
|
||||
virtual ~AMDIL7XXDevice();
|
||||
virtual size_t getMaxLDSSize() const;
|
||||
virtual size_t getWavefrontSize() const;
|
||||
virtual uint32_t getGeneration() const;
|
||||
virtual uint32_t getResourceID(uint32_t DeviceID) const;
|
||||
virtual uint32_t getMaxNumUAVs() const;
|
||||
|
||||
protected:
|
||||
virtual void setCaps();
|
||||
}; // AMDIL7XXDevice
|
||||
|
||||
// The AMDIL770Device class represents the RV770 chip and it's
|
||||
// derivative cards. The difference between this device and the base
|
||||
// class is this device device adds support for double precision
|
||||
// and has a larger wavefront size.
|
||||
class AMDIL770Device : public AMDIL7XXDevice {
|
||||
public:
|
||||
AMDIL770Device(AMDILSubtarget *ST);
|
||||
virtual ~AMDIL770Device();
|
||||
virtual size_t getWavefrontSize() const;
|
||||
private:
|
||||
virtual void setCaps();
|
||||
}; // AMDIL770Device
|
||||
|
||||
// The AMDIL710Device class derives from the 7XX base class, but this
|
||||
// class is a smaller derivative, so we need to overload some of the
|
||||
// functions in order to correctly specify this information.
|
||||
class AMDIL710Device : public AMDIL7XXDevice {
|
||||
public:
|
||||
AMDIL710Device(AMDILSubtarget *ST);
|
||||
virtual ~AMDIL710Device();
|
||||
virtual size_t getWavefrontSize() const;
|
||||
}; // AMDIL710Device
|
||||
|
||||
} // namespace llvm
|
||||
#endif // _AMDILDEVICEIMPL_H_
|
|
@ -1,93 +0,0 @@
|
|||
//===------ AMDILAlgorithms.tpp - AMDIL Template Algorithms Header --------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file provides templates algorithms that extend the STL algorithms, but
|
||||
// are useful for the AMDIL backend
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// A template function that loops through the iterators and passes the second
|
||||
// argument along with each iterator to the function. If the function returns
|
||||
// true, then the current iterator is invalidated and it moves back, before
|
||||
// moving forward to the next iterator, otherwise it moves forward without
|
||||
// issue. This is based on the for_each STL function, but allows a reference to
|
||||
// the second argument
|
||||
template<class InputIterator, class Function, typename Arg>
|
||||
Function binaryForEach(InputIterator First, InputIterator Last, Function F,
|
||||
Arg &Second)
|
||||
{
|
||||
for ( ; First!=Last; ++First ) {
|
||||
F(*First, Second);
|
||||
}
|
||||
return F;
|
||||
}
|
||||
|
||||
template<class InputIterator, class Function, typename Arg>
|
||||
Function safeBinaryForEach(InputIterator First, InputIterator Last, Function F,
|
||||
Arg &Second)
|
||||
{
|
||||
for ( ; First!=Last; ++First ) {
|
||||
if (F(*First, Second)) {
|
||||
--First;
|
||||
}
|
||||
}
|
||||
return F;
|
||||
}
|
||||
|
||||
// A template function that has two levels of looping before calling the
|
||||
// function with the passed in argument. See binaryForEach for further
|
||||
// explanation
|
||||
template<class InputIterator, class Function, typename Arg>
|
||||
Function binaryNestedForEach(InputIterator First, InputIterator Last,
|
||||
Function F, Arg &Second)
|
||||
{
|
||||
for ( ; First != Last; ++First) {
|
||||
binaryForEach(First->begin(), First->end(), F, Second);
|
||||
}
|
||||
return F;
|
||||
}
|
||||
template<class InputIterator, class Function, typename Arg>
|
||||
Function safeBinaryNestedForEach(InputIterator First, InputIterator Last,
|
||||
Function F, Arg &Second)
|
||||
{
|
||||
for ( ; First != Last; ++First) {
|
||||
safeBinaryForEach(First->begin(), First->end(), F, Second);
|
||||
}
|
||||
return F;
|
||||
}
|
||||
|
||||
// Unlike the STL, a pointer to the iterator itself is passed in with the 'safe'
|
||||
// versions of these functions This allows the function to handle situations
|
||||
// such as invalidated iterators
|
||||
template<class InputIterator, class Function>
|
||||
Function safeForEach(InputIterator First, InputIterator Last, Function F)
|
||||
{
|
||||
for ( ; First!=Last; ++First ) F(&First)
|
||||
; // Do nothing.
|
||||
return F;
|
||||
}
|
||||
|
||||
// A template function that has two levels of looping before calling the
|
||||
// function with a pointer to the current iterator. See binaryForEach for
|
||||
// further explanation
|
||||
template<class InputIterator, class SecondIterator, class Function>
|
||||
Function safeNestedForEach(InputIterator First, InputIterator Last,
|
||||
SecondIterator S, Function F)
|
||||
{
|
||||
for ( ; First != Last; ++First) {
|
||||
SecondIterator sf, sl;
|
||||
for (sf = First->begin(), sl = First->end();
|
||||
sf != sl; ) {
|
||||
if (!F(&sf)) {
|
||||
++sf;
|
||||
}
|
||||
}
|
||||
}
|
||||
return F;
|
||||
}
|
|
@ -1,113 +0,0 @@
|
|||
//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Target-independent interfaces which we are implementing
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
include "llvm/Target/Target.td"
|
||||
|
||||
// Dummy Instruction itineraries for pseudo instructions
|
||||
def ALU_NULL : FuncUnit;
|
||||
def NullALU : InstrItinClass;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDIL Subtarget features.
|
||||
//===----------------------------------------------------------------------===//
|
||||
def FeatureFP64 : SubtargetFeature<"fp64",
|
||||
"CapsOverride[AMDILDeviceInfo::DoubleOps]",
|
||||
"true",
|
||||
"Enable 64bit double precision operations">;
|
||||
def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
|
||||
"CapsOverride[AMDILDeviceInfo::ByteStores]",
|
||||
"true",
|
||||
"Enable byte addressable stores">;
|
||||
def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
|
||||
"CapsOverride[AMDILDeviceInfo::BarrierDetect]",
|
||||
"true",
|
||||
"Enable duplicate barrier detection(HD5XXX or later).">;
|
||||
def FeatureImages : SubtargetFeature<"images",
|
||||
"CapsOverride[AMDILDeviceInfo::Images]",
|
||||
"true",
|
||||
"Enable image functions">;
|
||||
def FeatureMultiUAV : SubtargetFeature<"multi_uav",
|
||||
"CapsOverride[AMDILDeviceInfo::MultiUAV]",
|
||||
"true",
|
||||
"Generate multiple UAV code(HD5XXX family or later)">;
|
||||
def FeatureMacroDB : SubtargetFeature<"macrodb",
|
||||
"CapsOverride[AMDILDeviceInfo::MacroDB]",
|
||||
"true",
|
||||
"Use internal macrodb, instead of macrodb in driver">;
|
||||
def FeatureNoAlias : SubtargetFeature<"noalias",
|
||||
"CapsOverride[AMDILDeviceInfo::NoAlias]",
|
||||
"true",
|
||||
"assert that all kernel argument pointers are not aliased">;
|
||||
def FeatureNoInline : SubtargetFeature<"no-inline",
|
||||
"CapsOverride[AMDILDeviceInfo::NoInline]",
|
||||
"true",
|
||||
"specify whether to not inline functions">;
|
||||
|
||||
def Feature64BitPtr : SubtargetFeature<"64BitPtr",
|
||||
"mIs64bit",
|
||||
"false",
|
||||
"Specify if 64bit addressing should be used.">;
|
||||
|
||||
def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
|
||||
"mIs32on64bit",
|
||||
"false",
|
||||
"Specify if 64bit sized pointers with 32bit addressing should be used.">;
|
||||
def FeatureDebug : SubtargetFeature<"debug",
|
||||
"CapsOverride[AMDILDeviceInfo::Debug]",
|
||||
"true",
|
||||
"Debug mode is enabled, so disable hardware accelerated address spaces.">;
|
||||
def FeatureDumpCode : SubtargetFeature <"DumpCode",
|
||||
"mDumpCode",
|
||||
"true",
|
||||
"Dump MachineInstrs in the CodeEmitter">;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register File, Calling Conv, Instruction Descriptions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
include "AMDILRegisterInfo.td"
|
||||
include "AMDILCallingConv.td"
|
||||
include "AMDILInstrInfo.td"
|
||||
|
||||
def AMDILInstrInfo : InstrInfo {}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDIL processors supported.
|
||||
//===----------------------------------------------------------------------===//
|
||||
//include "Processors.td"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Declare the target which we are implementing
|
||||
//===----------------------------------------------------------------------===//
|
||||
def AMDILAsmWriter : AsmWriter {
|
||||
string AsmWriterClassName = "AsmPrinter";
|
||||
int Variant = 0;
|
||||
}
|
||||
|
||||
def AMDILAsmParser : AsmParser {
|
||||
string AsmParserClassName = "AsmParser";
|
||||
int Variant = 0;
|
||||
|
||||
string CommentDelimiter = ";";
|
||||
|
||||
string RegisterPrefix = "r";
|
||||
|
||||
}
|
||||
|
||||
|
||||
def AMDIL : Target {
|
||||
// Pull in Instruction Info:
|
||||
let InstructionSet = AMDILInstrInfo;
|
||||
let AssemblyWriters = [AMDILAsmWriter];
|
||||
let AssemblyParsers = [AMDILAsmParser];
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,42 +0,0 @@
|
|||
//===- AMDILCallingConv.td - Calling Conventions AMDIL -----*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// This describes the calling conventions for the AMDIL architectures.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Return Value Calling Conventions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// AMDIL 32-bit C return-value convention.
|
||||
def RetCC_AMDIL32 : CallingConv<[
|
||||
// Since IL has no return values, all values can be emulated on the stack
|
||||
// The stack can then be mapped to a number of sequential virtual registers
|
||||
// in IL
|
||||
|
||||
// Integer and FP scalar values get put on the stack at 16-byte alignment
|
||||
// but with a size of 4 bytes
|
||||
CCIfType<[i32, f32], CCAssignToReg<
|
||||
[
|
||||
R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20
|
||||
]> >, CCAssignToStack<16, 16>]>;
|
||||
|
||||
// AMDIL 32-bit C Calling convention.
|
||||
def CC_AMDIL32 : CallingConv<[
|
||||
// Since IL has parameter values, all values can be emulated on the stack
|
||||
// The stack can then be mapped to a number of sequential virtual registers
|
||||
// in IL
|
||||
// Integer and FP scalar values get put on the stack at 16-byte alignment
|
||||
// but with a size of 4 bytes
|
||||
// Integer and FP scalar values get put on the stack at 16-byte alignment
|
||||
// but with a size of 4 bytes
|
||||
CCIfType<[i32, f32], CCAssignToReg<
|
||||
[R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20
|
||||
]> >, CCAssignToStack<16, 16>]>;
|
|
@ -1,48 +0,0 @@
|
|||
//===-- AMDILCodeEmitter.h - AMDIL Code Emitter interface -----------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// CodeEmitter interface for R600 and SI codegen.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDILCODEEMITTER_H
|
||||
#define AMDILCODEEMITTER_H
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDILCodeEmitter {
|
||||
public:
|
||||
uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
|
||||
virtual uint64_t getMachineOpValue(const MachineInstr &MI,
|
||||
const MachineOperand &MO) const { return 0; }
|
||||
virtual unsigned GPR4AlignEncode(const MachineInstr &MI,
|
||||
unsigned OpNo) const {
|
||||
return 0;
|
||||
}
|
||||
virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
|
||||
unsigned OpNo) const {
|
||||
return 0;
|
||||
}
|
||||
virtual uint64_t VOPPostEncode(const MachineInstr &MI,
|
||||
uint64_t Value) const {
|
||||
return Value;
|
||||
}
|
||||
virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
|
||||
unsigned OpNo) const {
|
||||
return 0;
|
||||
}
|
||||
virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
|
||||
const {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
#endif // AMDILCODEEMITTER_H
|
|
@ -1,137 +0,0 @@
|
|||
//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#include "AMDILDevice.h"
|
||||
#include "AMDILSubtarget.h"
|
||||
|
||||
using namespace llvm;
|
||||
// Default implementation for all of the classes.
|
||||
AMDILDevice::AMDILDevice(AMDILSubtarget *ST) : mSTM(ST)
|
||||
{
|
||||
mHWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities);
|
||||
mSWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities);
|
||||
setCaps();
|
||||
mDeviceFlag = OCL_DEVICE_ALL;
|
||||
}
|
||||
|
||||
AMDILDevice::~AMDILDevice()
|
||||
{
|
||||
mHWBits.clear();
|
||||
mSWBits.clear();
|
||||
}
|
||||
|
||||
size_t AMDILDevice::getMaxGDSSize() const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AMDILDevice::getDeviceFlag() const
|
||||
{
|
||||
return mDeviceFlag;
|
||||
}
|
||||
|
||||
size_t AMDILDevice::getMaxNumCBs() const
|
||||
{
|
||||
if (usesHardware(AMDILDeviceInfo::ConstantMem)) {
|
||||
return HW_MAX_NUM_CB;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t AMDILDevice::getMaxCBSize() const
|
||||
{
|
||||
if (usesHardware(AMDILDeviceInfo::ConstantMem)) {
|
||||
return MAX_CB_SIZE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t AMDILDevice::getMaxScratchSize() const
|
||||
{
|
||||
return 65536;
|
||||
}
|
||||
|
||||
uint32_t AMDILDevice::getStackAlignment() const
|
||||
{
|
||||
return 16;
|
||||
}
|
||||
|
||||
void AMDILDevice::setCaps()
|
||||
{
|
||||
mSWBits.set(AMDILDeviceInfo::HalfOps);
|
||||
mSWBits.set(AMDILDeviceInfo::ByteOps);
|
||||
mSWBits.set(AMDILDeviceInfo::ShortOps);
|
||||
mSWBits.set(AMDILDeviceInfo::HW64BitDivMod);
|
||||
if (mSTM->isOverride(AMDILDeviceInfo::NoInline)) {
|
||||
mSWBits.set(AMDILDeviceInfo::NoInline);
|
||||
}
|
||||
if (mSTM->isOverride(AMDILDeviceInfo::MacroDB)) {
|
||||
mSWBits.set(AMDILDeviceInfo::MacroDB);
|
||||
}
|
||||
if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
|
||||
mSWBits.set(AMDILDeviceInfo::ConstantMem);
|
||||
} else {
|
||||
mHWBits.set(AMDILDeviceInfo::ConstantMem);
|
||||
}
|
||||
if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
|
||||
mSWBits.set(AMDILDeviceInfo::PrivateMem);
|
||||
} else {
|
||||
mHWBits.set(AMDILDeviceInfo::PrivateMem);
|
||||
}
|
||||
if (mSTM->isOverride(AMDILDeviceInfo::BarrierDetect)) {
|
||||
mSWBits.set(AMDILDeviceInfo::BarrierDetect);
|
||||
}
|
||||
mSWBits.set(AMDILDeviceInfo::ByteLDSOps);
|
||||
mSWBits.set(AMDILDeviceInfo::LongOps);
|
||||
}
|
||||
|
||||
AMDILDeviceInfo::ExecutionMode
|
||||
AMDILDevice::getExecutionMode(AMDILDeviceInfo::Caps Caps) const
|
||||
{
|
||||
if (mHWBits[Caps]) {
|
||||
assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
|
||||
return AMDILDeviceInfo::Hardware;
|
||||
}
|
||||
|
||||
if (mSWBits[Caps]) {
|
||||
assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
|
||||
return AMDILDeviceInfo::Software;
|
||||
}
|
||||
|
||||
return AMDILDeviceInfo::Unsupported;
|
||||
|
||||
}
|
||||
|
||||
bool AMDILDevice::isSupported(AMDILDeviceInfo::Caps Mode) const
|
||||
{
|
||||
return getExecutionMode(Mode) != AMDILDeviceInfo::Unsupported;
|
||||
}
|
||||
|
||||
bool AMDILDevice::usesHardware(AMDILDeviceInfo::Caps Mode) const
|
||||
{
|
||||
return getExecutionMode(Mode) == AMDILDeviceInfo::Hardware;
|
||||
}
|
||||
|
||||
bool AMDILDevice::usesSoftware(AMDILDeviceInfo::Caps Mode) const
|
||||
{
|
||||
return getExecutionMode(Mode) == AMDILDeviceInfo::Software;
|
||||
}
|
||||
|
||||
std::string
|
||||
AMDILDevice::getDataLayout() const
|
||||
{
|
||||
return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
|
||||
"-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
|
||||
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
|
||||
"-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
|
||||
"-v512:512:512-v1024:1024:1024-v2048:2048:2048"
|
||||
"-n8:16:32:64");
|
||||
}
|
|
@ -1,116 +0,0 @@
|
|||
//===---- AMDILDevice.h - Define Device Data for AMDIL -----*- C++ -*------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface for the subtarget data classes.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// This file will define the interface that each generation needs to
|
||||
// implement in order to correctly answer queries on the capabilities of the
|
||||
// specific hardware.
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef _AMDILDEVICEIMPL_H_
|
||||
#define _AMDILDEVICEIMPL_H_
|
||||
#include "AMDIL.h"
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
|
||||
namespace llvm {
|
||||
class AMDILSubtarget;
|
||||
class MCStreamer;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Interface for data that is specific to a single device
|
||||
//===----------------------------------------------------------------------===//
|
||||
class AMDILDevice {
|
||||
public:
|
||||
AMDILDevice(AMDILSubtarget *ST);
|
||||
virtual ~AMDILDevice();
|
||||
|
||||
// Enum values for the various memory types.
|
||||
enum {
|
||||
RAW_UAV_ID = 0,
|
||||
ARENA_UAV_ID = 1,
|
||||
LDS_ID = 2,
|
||||
GDS_ID = 3,
|
||||
SCRATCH_ID = 4,
|
||||
CONSTANT_ID = 5,
|
||||
GLOBAL_ID = 6,
|
||||
MAX_IDS = 7
|
||||
} IO_TYPE_IDS;
|
||||
|
||||
// Returns the max LDS size that the hardware supports. Size is in
|
||||
// bytes.
|
||||
virtual size_t getMaxLDSSize() const = 0;
|
||||
|
||||
// Returns the max GDS size that the hardware supports if the GDS is
|
||||
// supported by the hardware. Size is in bytes.
|
||||
virtual size_t getMaxGDSSize() const;
|
||||
|
||||
// Returns the max number of hardware constant address spaces that
|
||||
// are supported by this device.
|
||||
virtual size_t getMaxNumCBs() const;
|
||||
|
||||
// Returns the max number of bytes a single hardware constant buffer
|
||||
// can support. Size is in bytes.
|
||||
virtual size_t getMaxCBSize() const;
|
||||
|
||||
// Returns the max number of bytes allowed by the hardware scratch
|
||||
// buffer. Size is in bytes.
|
||||
virtual size_t getMaxScratchSize() const;
|
||||
|
||||
// Get the flag that corresponds to the device.
|
||||
virtual uint32_t getDeviceFlag() const;
|
||||
|
||||
// Returns the number of work-items that exist in a single hardware
|
||||
// wavefront.
|
||||
virtual size_t getWavefrontSize() const = 0;
|
||||
|
||||
// Get the generational name of this specific device.
|
||||
virtual uint32_t getGeneration() const = 0;
|
||||
|
||||
// Get the stack alignment of this specific device.
|
||||
virtual uint32_t getStackAlignment() const;
|
||||
|
||||
// Get the resource ID for this specific device.
|
||||
virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
|
||||
|
||||
// Get the max number of UAV's for this device.
|
||||
virtual uint32_t getMaxNumUAVs() const = 0;
|
||||
|
||||
|
||||
// API utilizing more detailed capabilities of each family of
|
||||
// cards. If a capability is supported, then either usesHardware or
|
||||
// usesSoftware returned true. If usesHardware returned true, then
|
||||
// usesSoftware must return false for the same capability. Hardware
|
||||
// execution means that the feature is done natively by the hardware
|
||||
// and is not emulated by the softare. Software execution means
|
||||
// that the feature could be done in the hardware, but there is
|
||||
// software that emulates it with possibly using the hardware for
|
||||
// support since the hardware does not fully comply with OpenCL
|
||||
// specs.
|
||||
bool isSupported(AMDILDeviceInfo::Caps Mode) const;
|
||||
bool usesHardware(AMDILDeviceInfo::Caps Mode) const;
|
||||
bool usesSoftware(AMDILDeviceInfo::Caps Mode) const;
|
||||
virtual std::string getDataLayout() const;
|
||||
static const unsigned int MAX_LDS_SIZE_700 = 16384;
|
||||
static const unsigned int MAX_LDS_SIZE_800 = 32768;
|
||||
static const unsigned int WavefrontSize = 64;
|
||||
static const unsigned int HalfWavefrontSize = 32;
|
||||
static const unsigned int QuarterWavefrontSize = 16;
|
||||
protected:
|
||||
virtual void setCaps();
|
||||
llvm::BitVector mHWBits;
|
||||
llvm::BitVector mSWBits;
|
||||
AMDILSubtarget *mSTM;
|
||||
uint32_t mDeviceFlag;
|
||||
private:
|
||||
AMDILDeviceInfo::ExecutionMode
|
||||
getExecutionMode(AMDILDeviceInfo::Caps Caps) const;
|
||||
}; // AMDILDevice
|
||||
|
||||
} // namespace llvm
|
||||
#endif // _AMDILDEVICEIMPL_H_
|
|
@ -1,93 +0,0 @@
|
|||
//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// Function that creates DeviceInfo from a device name and other information.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#include "AMDILDevices.h"
|
||||
#include "AMDILSubtarget.h"
|
||||
|
||||
using namespace llvm;
|
||||
namespace llvm {
|
||||
namespace AMDILDeviceInfo {
|
||||
AMDILDevice*
|
||||
getDeviceFromName(const std::string &deviceName, AMDILSubtarget *ptr, bool is64bit, bool is64on32bit)
|
||||
{
|
||||
if (deviceName.c_str()[2] == '7') {
|
||||
switch (deviceName.c_str()[3]) {
|
||||
case '1':
|
||||
return new AMDIL710Device(ptr);
|
||||
case '7':
|
||||
return new AMDIL770Device(ptr);
|
||||
default:
|
||||
return new AMDIL7XXDevice(ptr);
|
||||
};
|
||||
} else if (deviceName == "cypress") {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDILCypressDevice(ptr);
|
||||
} else if (deviceName == "juniper") {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDILEvergreenDevice(ptr);
|
||||
} else if (deviceName == "redwood") {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDILRedwoodDevice(ptr);
|
||||
} else if (deviceName == "cedar") {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDILCedarDevice(ptr);
|
||||
} else if (deviceName == "barts"
|
||||
|| deviceName == "turks") {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDILNIDevice(ptr);
|
||||
} else if (deviceName == "cayman") {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDILCaymanDevice(ptr);
|
||||
} else if (deviceName == "caicos") {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDILNIDevice(ptr);
|
||||
} else if (deviceName == "SI") {
|
||||
return new AMDILSIDevice(ptr);
|
||||
} else {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDIL7XXDevice(ptr);
|
||||
}
|
||||
}
|
||||
} // End namespace AMDILDeviceInfo
|
||||
} // End namespace llvm
|
|
@ -1,89 +0,0 @@
|
|||
//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#ifndef _AMDILDEVICEINFO_H_
|
||||
#define _AMDILDEVICEINFO_H_
|
||||
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace llvm
|
||||
{
|
||||
class AMDILDevice;
|
||||
class AMDILSubtarget;
|
||||
namespace AMDILDeviceInfo
|
||||
{
|
||||
// Each Capabilities can be executed using a hardware instruction,
|
||||
// emulated with a sequence of software instructions, or not
|
||||
// supported at all.
|
||||
enum ExecutionMode {
|
||||
Unsupported = 0, // Unsupported feature on the card(Default value)
|
||||
Software, // This is the execution mode that is set if the
|
||||
// feature is emulated in software
|
||||
Hardware // This execution mode is set if the feature exists
|
||||
// natively in hardware
|
||||
};
|
||||
|
||||
// Any changes to this needs to have a corresponding update to the
|
||||
// twiki page GPUMetadataABI
|
||||
enum Caps {
|
||||
HalfOps = 0x1, // Half float is supported or not.
|
||||
DoubleOps = 0x2, // Double is supported or not.
|
||||
ByteOps = 0x3, // Byte(char) is support or not.
|
||||
ShortOps = 0x4, // Short is supported or not.
|
||||
LongOps = 0x5, // Long is supported or not.
|
||||
Images = 0x6, // Images are supported or not.
|
||||
ByteStores = 0x7, // ByteStores available(!HD4XXX).
|
||||
ConstantMem = 0x8, // Constant/CB memory.
|
||||
LocalMem = 0x9, // Local/LDS memory.
|
||||
PrivateMem = 0xA, // Scratch/Private/Stack memory.
|
||||
RegionMem = 0xB, // OCL GDS Memory Extension.
|
||||
FMA = 0xC, // Use HW FMA or SW FMA.
|
||||
ArenaSegment = 0xD, // Use for Arena UAV per pointer 12-1023.
|
||||
MultiUAV = 0xE, // Use for UAV per Pointer 0-7.
|
||||
Reserved0 = 0xF, // ReservedFlag
|
||||
NoAlias = 0x10, // Cached loads.
|
||||
Signed24BitOps = 0x11, // Peephole Optimization.
|
||||
// Debug mode implies that no hardware features or optimizations
|
||||
// are performned and that all memory access go through a single
|
||||
// uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
|
||||
Debug = 0x12, // Debug mode is enabled.
|
||||
CachedMem = 0x13, // Cached mem is available or not.
|
||||
BarrierDetect = 0x14, // Detect duplicate barriers.
|
||||
Reserved1 = 0x15, // Reserved flag
|
||||
ByteLDSOps = 0x16, // Flag to specify if byte LDS ops are available.
|
||||
ArenaVectors = 0x17, // Flag to specify if vector loads from arena work.
|
||||
TmrReg = 0x18, // Flag to specify if Tmr register is supported.
|
||||
NoInline = 0x19, // Flag to specify that no inlining should occur.
|
||||
MacroDB = 0x1A, // Flag to specify that backend handles macrodb.
|
||||
HW64BitDivMod = 0x1B, // Flag for backend to generate 64bit div/mod.
|
||||
ArenaUAV = 0x1C, // Flag to specify that arena uav is supported.
|
||||
PrivateUAV = 0x1D, // Flag to specify that private memory uses uav's.
|
||||
// If more capabilities are required, then
|
||||
// this number needs to be increased.
|
||||
// All capabilities must come before this
|
||||
// number.
|
||||
MaxNumberCapabilities = 0x20
|
||||
};
|
||||
// These have to be in order with the older generations
|
||||
// having the lower number enumerations.
|
||||
enum Generation {
|
||||
HD4XXX = 0, // 7XX based devices.
|
||||
HD5XXX, // Evergreen based devices.
|
||||
HD6XXX, // NI/Evergreen+ based devices.
|
||||
HD7XXX,
|
||||
HDTEST, // Experimental feature testing device.
|
||||
HDNUMGEN
|
||||
};
|
||||
|
||||
|
||||
AMDILDevice*
|
||||
getDeviceFromName(const std::string &name, AMDILSubtarget *ptr, bool is64bit = false, bool is64on32bit = false);
|
||||
} // namespace AMDILDeviceInfo
|
||||
} // namespace llvm
|
||||
#endif // _AMDILDEVICEINFO_H_
|
|
@ -1,19 +0,0 @@
|
|||
//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#ifndef __AMDIL_DEVICES_H_
|
||||
#define __AMDIL_DEVICES_H_
|
||||
// Include all of the device specific header files
|
||||
// This file is for Internal use only!
|
||||
#include "AMDIL7XXDevice.h"
|
||||
#include "AMDILDevice.h"
|
||||
#include "AMDILEvergreenDevice.h"
|
||||
#include "AMDILNIDevice.h"
|
||||
#include "AMDILSIDevice.h"
|
||||
|
||||
#endif // _AMDIL_DEVICES_H_
|
|
@ -1,522 +0,0 @@
|
|||
//===-- AMDILEnumeratedTypes.td - IL Type definitions --*- tablegen -*-----===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
// ILEnumreatedTypes.td - The IL Enumerated Types
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
// Section 5.1 IL Shader
|
||||
class ILShader<bits<8> val> {
|
||||
bits<8> Value = val;
|
||||
}
|
||||
// Table 5-1
|
||||
def IL_SHADER_PIXEL : ILShader<0>;
|
||||
def IL_SHADER_COMPUTE : ILShader<1>;
|
||||
|
||||
// Section 5.2 IL RegType
|
||||
class ILRegType<bits<6> val> {
|
||||
bits<6> Value = val;
|
||||
}
|
||||
// Table 5-2
|
||||
def IL_REGTYPE_TEMP : ILRegType<0>;
|
||||
def IL_REGTYPE_WINCOORD : ILRegType<1>;
|
||||
def IL_REGTYPE_CONST_BUF : ILRegType<2>;
|
||||
def IL_REGTYPE_LITERAL : ILRegType<3>;
|
||||
def IL_REGTYPE_ITEMP : ILRegType<4>;
|
||||
def IL_REGTYPE_GLOBAL : ILRegType<5>;
|
||||
|
||||
// Section 5.3 IL Component Select
|
||||
class ILComponentSelect<bits<3> val, string text> {
|
||||
bits<3> Value = val;
|
||||
string Text = text;
|
||||
}
|
||||
// Table 5-3
|
||||
def IL_COMPSEL_X : ILComponentSelect<0, "x">;
|
||||
def IL_COMPSEL_Y : ILComponentSelect<1, "y">;
|
||||
def IL_COMPSEL_Z : ILComponentSelect<2, "z">;
|
||||
def IL_COMPSEL_W : ILComponentSelect<3, "w">;
|
||||
def IL_COMPSEL_0 : ILComponentSelect<4, "0">;
|
||||
def IL_COMPSEL_1 : ILComponentSelect<5, "1">;
|
||||
|
||||
// Section 5.4 IL Mod Dst Comp
|
||||
class ILModDstComp<bits<2> val, string text> {
|
||||
bits<2> Value = val;
|
||||
string Text = text;
|
||||
}
|
||||
// Table 5-4
|
||||
def IL_MODCOMP_NOWRITE : ILModDstComp<0, "_">;
|
||||
def IL_MODCOMP_WRITE_X : ILModDstComp<1, "x">;
|
||||
def IL_MODCOMP_WRITE_y : ILModDstComp<1, "y">;
|
||||
def IL_MODCOMP_WRITE_z : ILModDstComp<1, "z">;
|
||||
def IL_MODCOMP_WRITE_w : ILModDstComp<1, "w">;
|
||||
def IL_MODCOMP_0 : ILModDstComp<2, "0">;
|
||||
def IL_MODCOMP_1 : ILModDstComp<3, "1">;
|
||||
|
||||
// Section 5.5 IL Import Usage
|
||||
class ILImportUsage<bits<1> val, string usage> {
|
||||
bits<1> Value = val;
|
||||
string Text = usage;
|
||||
}
|
||||
// Table 5-5
|
||||
def IL_IMPORTUSAGE_WINCOORD : ILImportUsage<0, "_usage(wincoord)">;
|
||||
|
||||
// Section 5.6 Il Shift Scale
|
||||
class ILShiftScale<bits<4> val, string scale> {
|
||||
bits<4> Value = val;
|
||||
string Text = scale;
|
||||
}
|
||||
|
||||
// Table 5-6
|
||||
def IL_SHIFT_NONE : ILShiftScale<0, "">;
|
||||
def IL_SHIFT_X2 : ILShiftScale<1, "_x2">;
|
||||
def IL_SHIFT_X4 : ILShiftScale<2, "_x4">;
|
||||
def IL_SHIFT_X8 : ILShiftScale<3, "_x8">;
|
||||
def IL_SHIFT_D2 : ILShiftScale<4, "_d2">;
|
||||
def IL_SHIFT_D4 : ILShiftScale<5, "_d4">;
|
||||
def IL_SHIFT_D8 : ILShiftScale<6, "_d8">;
|
||||
|
||||
// Section 5.7 IL Divide Component
|
||||
class ILDivComp<bits<3> val, string divcomp> {
|
||||
bits<3> Value = val;
|
||||
string Text = divcomp;
|
||||
}
|
||||
|
||||
// Table 5-7
|
||||
def IL_DIVCOMP_NONE : ILDivComp<0, "_divcomp(none)">;
|
||||
def IL_DIVCOMP_Y : ILDivComp<1, "_divcomp(y)">;
|
||||
def IL_DIVCOMP_Z : ILDivComp<2, "_divcomp(z)">;
|
||||
def IL_DIVCOMP_W : ILDivComp<3, "_divcomp(w)">;
|
||||
//def IL_DIVCOMP_UNKNOWN : ILDivComp<4, "_divcomp(unknown)">;
|
||||
|
||||
// Section 5.8 IL Relational Op
|
||||
class ILRelOp<bits<3> val, string op> {
|
||||
bits<3> Value = val;
|
||||
string Text = op;
|
||||
}
|
||||
|
||||
// Table 5-8
|
||||
def IL_RELOP_EQ : ILRelOp<0, "_relop(eq)">;
|
||||
def IL_RELOP_NE : ILRelOp<1, "_relop(ne)">;
|
||||
def IL_RELOP_GT : ILRelOp<2, "_relop(gt)">;
|
||||
def IL_RELOP_GE : ILRelOp<3, "_relop(ge)">;
|
||||
def IL_RELOP_LT : ILRelOp<4, "_relop(lt)">;
|
||||
def IL_RELOP_LE : ILRelOp<5, "_relop(le)">;
|
||||
|
||||
// Section 5.9 IL Zero Op
|
||||
class ILZeroOp<bits<3> val, string behavior> {
|
||||
bits<3> Value = val;
|
||||
string Text = behavior;
|
||||
}
|
||||
|
||||
// Table 5-9
|
||||
def IL_ZEROOP_FLTMAX : ILZeroOp<0, "_zeroop(fltmax)">;
|
||||
def IL_ZEROOP_0 : ILZeroOp<1, "_zeroop(zero)">;
|
||||
def IL_ZEROOP_INFINITY : ILZeroOp<2, "_zeroop(infinity)">;
|
||||
def IL_ZEROOP_INF_ELSE_MAX : ILZeroOp<3, "_zeroop(inf_else_max)">;
|
||||
|
||||
// Section 5.10 IL Cmp Value
|
||||
class ILCmpValue<bits<3> val, string num> {
|
||||
bits<3> Value = val;
|
||||
string Text = num;
|
||||
}
|
||||
|
||||
// Table 5-10
|
||||
def IL_CMPVAL_0_0 : ILCmpValue<0, "0.0">;
|
||||
def IL_CMPVAL_0_5 : ILCmpValue<1, "0.5">;
|
||||
def IL_CMPVAL_1_0 : ILCmpValue<2, "1.0">;
|
||||
def IL_CMPVAL_NEG_0_5 : ILCmpValue<3, "-0.5">;
|
||||
def IL_CMPVAL_NEG_1_0 : ILCmpValue<4, "-1.0">;
|
||||
|
||||
// Section 5.11 IL Addressing
|
||||
class ILAddressing<bits<3> val> {
|
||||
bits<3> Value = val;
|
||||
}
|
||||
|
||||
// Table 5-11
|
||||
def IL_ADDR_ABSOLUTE : ILAddressing<0>;
|
||||
def IL_ADDR_RELATIVE : ILAddressing<1>;
|
||||
def IL_ADDR_REG_RELATIVE : ILAddressing<2>;
|
||||
|
||||
// Section 5.11 IL Element Format
|
||||
class ILElementFormat<bits<5> val> {
|
||||
bits<5> Value = val;
|
||||
}
|
||||
|
||||
// Table 5-11
|
||||
def IL_ELEMENTFORMAT_UNKNOWN : ILElementFormat<0>;
|
||||
def IL_ELEMENTFORMAT_SNORM : ILElementFormat<1>;
|
||||
def IL_ELEMENTFORMAT_UNORM : ILElementFormat<2>;
|
||||
def IL_ELEMENTFORMAT_SINT : ILElementFormat<3>;
|
||||
def IL_ELEMENTFORMAT_UINT : ILElementFormat<4>;
|
||||
def IL_ELEMENTFORMAT_FLOAT : ILElementFormat<5>;
|
||||
def IL_ELEMENTFORMAT_SRGB : ILElementFormat<6>;
|
||||
def IL_ELEMENTFORMAT_MIXED : ILElementFormat<7>;
|
||||
def IL_ELEMENTFORMAT_Last : ILElementFormat<8>;
|
||||
|
||||
// Section 5.12 IL Op Code
|
||||
class ILOpCode<bits<16> val = -1, string cmd> {
|
||||
bits<16> Value = val;
|
||||
string Text = cmd;
|
||||
}
|
||||
|
||||
// Table 5-12
|
||||
def IL_DCL_CONST_BUFFER : ILOpCode<0, "dcl_cb">;
|
||||
def IL_DCL_INDEXED_TEMP_ARRAY : ILOpCode<1, "dcl_index_temp_array">;
|
||||
def IL_DCL_INPUT : ILOpCode<2, "dcl_input">;
|
||||
def IL_DCL_LITERAL : ILOpCode<3, "dcl_literal">;
|
||||
def IL_DCL_OUTPUT : ILOpCode<4, "dcl_output">;
|
||||
def IL_DCL_RESOURCE : ILOpCode<5, "dcl_resource">;
|
||||
def IL_OP_ABS : ILOpCode<6, "abs">;
|
||||
def IL_OP_ADD : ILOpCode<7, "add">;
|
||||
def IL_OP_AND : ILOpCode<8, "iand">;
|
||||
def IL_OP_BREAK : ILOpCode<9, "break">;
|
||||
def IL_OP_BREAK_LOGICALNZ : ILOpCode<10, "break_logicalnz">;
|
||||
def IL_OP_BREAK_LOGICALZ : ILOpCode<11, "break_logicalz">;
|
||||
def IL_OP_BREAKC : ILOpCode<12, "breakc">;
|
||||
def IL_OP_CALL : ILOpCode<13, "call">;
|
||||
def IL_OP_CALL_LOGICALNZ : ILOpCode<14, "call_logicalnz">;
|
||||
def IL_OP_CALL_LOGICALZ : ILOpCode<15, "call_logicalz">;
|
||||
def IL_OP_CASE : ILOpCode<16, "case">;
|
||||
def IL_OP_CLG : ILOpCode<17, "clg">;
|
||||
def IL_OP_CMOV : ILOpCode<18, "cmov">;
|
||||
def IL_OP_CMOV_LOGICAL : ILOpCode<19, "cmov_logical">;
|
||||
def IL_OP_CMP : ILOpCode<20, "cmp">;
|
||||
def IL_OP_CONTINUE : ILOpCode<21, "continue">;
|
||||
def IL_OP_CONTINUE_LOGICALNZ : ILOpCode<22, "continue_logicalnz">;
|
||||
def IL_OP_CONTINUE_LOGICALZ : ILOpCode<23, "continue_logicalz">;
|
||||
def IL_OP_CONTINUEC : ILOpCode<24, "continuec">;
|
||||
def IL_OP_COS : ILOpCode<25, "cos">;
|
||||
def IL_OP_COS_VEC : ILOpCode<26, "cos_vec">;
|
||||
def IL_OP_D_2_F : ILOpCode<27, "d2f">;
|
||||
def IL_OP_D_ADD : ILOpCode<28, "dadd">;
|
||||
def IL_OP_D_EQ : ILOpCode<29, "deq">;
|
||||
def IL_OP_D_FRC : ILOpCode<30, "dfrac">;
|
||||
def IL_OP_D_FREXP : ILOpCode<31, "dfrexp">;
|
||||
def IL_OP_D_GE : ILOpCode<32, "dge">;
|
||||
def IL_OP_D_LDEXP : ILOpCode<33, "dldexp">;
|
||||
def IL_OP_D_LT : ILOpCode<34, "dlt">;
|
||||
def IL_OP_D_MAD : ILOpCode<35, "dmad">;
|
||||
def IL_OP_D_MUL : ILOpCode<36, "dmul">;
|
||||
def IL_OP_D_NE : ILOpCode<37, "dne">;
|
||||
def IL_OP_DEFAULT : ILOpCode<38, "default">;
|
||||
def IL_OP_DISCARD_LOGICALNZ : ILOpCode<39, "discard_logicalnz">;
|
||||
def IL_OP_DISCARD_LOGICALZ : ILOpCode<40, "discard_logicalz">;
|
||||
def IL_OP_DIV : ILOpCode<41, "div_zeroop(infinity)">;
|
||||
def IL_OP_DP2 : ILOpCode<42, "dp2">;
|
||||
def IL_OP_DP3 : ILOpCode<43, "dp3">;
|
||||
def IL_OP_DP4 : ILOpCode<44, "dp4">;
|
||||
def IL_OP_ELSE : ILOpCode<45, "else">;
|
||||
def IL_OP_END : ILOpCode<46, "end">;
|
||||
def IL_OP_ENDFUNC : ILOpCode<47, "endfunc">;
|
||||
def IL_OP_ENDIF : ILOpCode<48, "endif">;
|
||||
def IL_OP_ENDLOOP : ILOpCode<49, "endloop">;
|
||||
def IL_OP_ENDMAIN : ILOpCode<50, "endmain">;
|
||||
def IL_OP_ENDSWITCH : ILOpCode<51, "endswitch">;
|
||||
def IL_OP_EQ : ILOpCode<52, "eq">;
|
||||
def IL_OP_EXP : ILOpCode<53, "exp">;
|
||||
def IL_OP_EXP_VEC : ILOpCode<54, "exp_vec">;
|
||||
def IL_OP_F_2_D : ILOpCode<55, "f2d">;
|
||||
def IL_OP_FLR : ILOpCode<56, "flr">;
|
||||
def IL_OP_FRC : ILOpCode<57, "frc">;
|
||||
def IL_OP_FTOI : ILOpCode<58, "ftoi">;
|
||||
def IL_OP_FTOU : ILOpCode<59, "ftou">;
|
||||
def IL_OP_FUNC : ILOpCode<60, "func">;
|
||||
def IL_OP_GE : ILOpCode<61, "ge">;
|
||||
def IL_OP_I_ADD : ILOpCode<62, "iadd">;
|
||||
def IL_OP_I_EQ : ILOpCode<63, "ieq">;
|
||||
def IL_OP_I_GE : ILOpCode<64, "ige">;
|
||||
def IL_OP_I_LT : ILOpCode<65, "ilt">;
|
||||
def IL_OP_I_MAD : ILOpCode<66, "imad">;
|
||||
def IL_OP_I_MAX : ILOpCode<67, "imax">;
|
||||
def IL_OP_I_MIN : ILOpCode<68, "imin">;
|
||||
def IL_OP_I_MUL : ILOpCode<69, "imul">;
|
||||
def IL_OP_I_MUL_HIGH : ILOpCode<70, "imul_high">;
|
||||
def IL_OP_I_NE : ILOpCode<71, "ine">;
|
||||
def IL_OP_I_NEGATE : ILOpCode<72, "inegate">;
|
||||
def IL_OP_I_NOT : ILOpCode<73, "inot">;
|
||||
def IL_OP_I_OR : ILOpCode<74, "ior">;
|
||||
def IL_OP_I_SHL : ILOpCode<75, "ishl">;
|
||||
def IL_OP_I_SHR : ILOpCode<76, "ishr">;
|
||||
def IL_OP_I_XOR : ILOpCode<77, "ixor">;
|
||||
def IL_OP_IF_LOGICALNZ : ILOpCode<78, "if_logicalnz">;
|
||||
def IL_OP_IF_LOGICALZ : ILOpCode<79, "if_logicalz">;
|
||||
def IL_OP_IFC : ILOpCode<80, "ifc">;
|
||||
def IL_OP_ITOF : ILOpCode<81, "itof">;
|
||||
def IL_OP_LN : ILOpCode<82, "ln">;
|
||||
def IL_OP_LOG : ILOpCode<83, "log">;
|
||||
def IL_OP_LOG_VEC : ILOpCode<84, "log_vec">;
|
||||
def IL_OP_LOOP : ILOpCode<85, "loop">;
|
||||
def IL_OP_LT : ILOpCode<86, "lt">;
|
||||
def IL_OP_MAD : ILOpCode<87, "mad_ieee">;
|
||||
def IL_OP_MAX : ILOpCode<88, "max_ieee">;
|
||||
def IL_OP_MIN : ILOpCode<89, "min_ieee">;
|
||||
def IL_OP_MOD : ILOpCode<90, "mod_ieee">;
|
||||
def IL_OP_MOV : ILOpCode<91, "mov">;
|
||||
def IL_OP_MUL_IEEE : ILOpCode<92, "mul_ieee">;
|
||||
def IL_OP_NE : ILOpCode<93, "ne">;
|
||||
def IL_OP_NRM : ILOpCode<94, "nrm_nrm4_zeroop(zero)">;
|
||||
def IL_OP_POW : ILOpCode<95, "pow">;
|
||||
def IL_OP_RCP : ILOpCode<96, "rcp">;
|
||||
def IL_OP_RET : ILOpCode<97, "ret">;
|
||||
def IL_OP_RET_DYN : ILOpCode<98, "ret_dyn">;
|
||||
def IL_OP_RET_LOGICALNZ : ILOpCode<99, "ret_logicalnz">;
|
||||
def IL_OP_RET_LOGICALZ : ILOpCode<100, "ret_logicalz">;
|
||||
def IL_OP_RND : ILOpCode<101, "rnd">;
|
||||
def IL_OP_ROUND_NEAR : ILOpCode<102, "round_nearest">;
|
||||
def IL_OP_ROUND_NEG_INF : ILOpCode<103, "round_neginf">;
|
||||
def IL_OP_ROUND_POS_INF : ILOpCode<104, "round_plusinf">;
|
||||
def IL_OP_ROUND_ZERO : ILOpCode<105, "round_z">;
|
||||
def IL_OP_RSQ : ILOpCode<106, "rsq">;
|
||||
def IL_OP_RSQ_VEC : ILOpCode<107, "rsq_vec">;
|
||||
def IL_OP_SAMPLE : ILOpCode<108, "sample">;
|
||||
def IL_OP_SAMPLE_L : ILOpCode<109, "sample_l">;
|
||||
def IL_OP_SET : ILOpCode<110, "set">;
|
||||
def IL_OP_SGN : ILOpCode<111, "sgn">;
|
||||
def IL_OP_SIN : ILOpCode<112, "sin">;
|
||||
def IL_OP_SIN_VEC : ILOpCode<113, "sin_vec">;
|
||||
def IL_OP_SUB : ILOpCode<114, "sub">;
|
||||
def IL_OP_SWITCH : ILOpCode<115, "switch">;
|
||||
def IL_OP_TRC : ILOpCode<116, "trc">;
|
||||
def IL_OP_U_DIV : ILOpCode<117, "udiv">;
|
||||
def IL_OP_U_GE : ILOpCode<118, "uge">;
|
||||
def IL_OP_U_LT : ILOpCode<119, "ult">;
|
||||
def IL_OP_U_MAD : ILOpCode<120, "umad">;
|
||||
def IL_OP_U_MAX : ILOpCode<121, "umax">;
|
||||
def IL_OP_U_MIN : ILOpCode<122, "umin">;
|
||||
def IL_OP_U_MOD : ILOpCode<123, "umod">;
|
||||
def IL_OP_U_MUL : ILOpCode<124, "umul">;
|
||||
def IL_OP_U_MUL_HIGH : ILOpCode<125, "umul_high">;
|
||||
def IL_OP_U_SHR : ILOpCode<126, "ushr">;
|
||||
def IL_OP_UTOF : ILOpCode<127, "utof">;
|
||||
def IL_OP_WHILE : ILOpCode<128, "whileloop">;
|
||||
// SC IL instructions that are not in CAL IL
|
||||
def IL_OP_ACOS : ILOpCode<129, "acos">;
|
||||
def IL_OP_ASIN : ILOpCode<130, "asin">;
|
||||
def IL_OP_EXN : ILOpCode<131, "exn">;
|
||||
def IL_OP_UBIT_REVERSE : ILOpCode<132, "ubit_reverse">;
|
||||
def IL_OP_UBIT_EXTRACT : ILOpCode<133, "ubit_extract">;
|
||||
def IL_OP_IBIT_EXTRACT : ILOpCode<134, "ibit_extract">;
|
||||
def IL_OP_SQRT : ILOpCode<135, "sqrt">;
|
||||
def IL_OP_SQRT_VEC : ILOpCode<136, "sqrt_vec">;
|
||||
def IL_OP_ATAN : ILOpCode<137, "atan">;
|
||||
def IL_OP_TAN : ILOpCode<137, "tan">;
|
||||
def IL_OP_D_DIV : ILOpCode<138, "ddiv">;
|
||||
def IL_OP_F_NEG : ILOpCode<139, "mov">;
|
||||
def IL_OP_GT : ILOpCode<140, "gt">;
|
||||
def IL_OP_LE : ILOpCode<141, "lt">;
|
||||
def IL_OP_DIST : ILOpCode<142, "dist">;
|
||||
def IL_OP_LEN : ILOpCode<143, "len">;
|
||||
def IL_OP_MACRO : ILOpCode<144, "mcall">;
|
||||
def IL_OP_INTR : ILOpCode<145, "call">;
|
||||
def IL_OP_I_FFB_HI : ILOpCode<146, "ffb_hi">;
|
||||
def IL_OP_I_FFB_LO : ILOpCode<147, "ffb_lo">;
|
||||
def IL_OP_BARRIER : ILOpCode<148, "fence_threads_memory_lds">;
|
||||
def IL_OP_BARRIER_LOCAL : ILOpCode<149, "fence_threads_lds">;
|
||||
def IL_OP_BARRIER_GLOBAL : ILOpCode<150, "fence_threads_memory">;
|
||||
def IL_OP_FENCE : ILOpCode<151, "fence_lds_memory">;
|
||||
def IL_OP_FENCE_READ_ONLY : ILOpCode<152, "fence_lds_mem_read_only">;
|
||||
def IL_OP_FENCE_WRITE_ONLY : ILOpCode<153, "fence_lds_mem_write_only">;
|
||||
def IL_PSEUDO_INST : ILOpCode<154, ";Pseudo Op">;
|
||||
def IL_OP_UNPACK_0 : ILOpCode<155, "unpack0">;
|
||||
def IL_OP_UNPACK_1 : ILOpCode<156, "unpack1">;
|
||||
def IL_OP_UNPACK_2 : ILOpCode<157, "unpack2">;
|
||||
def IL_OP_UNPACK_3 : ILOpCode<158, "unpack3">;
|
||||
def IL_OP_PI_REDUCE : ILOpCode<159, "pireduce">;
|
||||
def IL_OP_IBIT_COUNT : ILOpCode<160, "icbits">;
|
||||
def IL_OP_I_FFB_SGN : ILOpCode<161, "ffb_shi">;
|
||||
def IL_OP_F2U4 : ILOpCode<162, "f_2_u4">;
|
||||
def IL_OP_BIT_ALIGN : ILOpCode<163, "bitalign">;
|
||||
def IL_OP_BYTE_ALIGN : ILOpCode<164, "bytealign">;
|
||||
def IL_OP_U4_LERP : ILOpCode<165, "u4lerp">;
|
||||
def IL_OP_SAD : ILOpCode<166, "sad">;
|
||||
def IL_OP_SAD_HI : ILOpCode<167, "sadhi">;
|
||||
def IL_OP_SAD4 : ILOpCode<168, "sad4">;
|
||||
def IL_OP_UBIT_INSERT : ILOpCode<169, "ubit_insert">;
|
||||
def IL_OP_I_CARRY : ILOpCode<170, "icarry">;
|
||||
def IL_OP_I_BORROW : ILOpCode<171, "iborrow">;
|
||||
def IL_OP_U_MAD24 : ILOpCode<172, "umad24">;
|
||||
def IL_OP_U_MUL24 : ILOpCode<173, "umul24">;
|
||||
def IL_OP_I_MAD24 : ILOpCode<174, "imad24">;
|
||||
def IL_OP_I_MUL24 : ILOpCode<175, "imul24">;
|
||||
def IL_OP_CLAMP : ILOpCode<176, "clamp">;
|
||||
def IL_OP_LERP : ILOpCode<177, "lrp">;
|
||||
def IL_OP_FMA : ILOpCode<178, "fma">;
|
||||
def IL_OP_D_MIN : ILOpCode<179, "dmin">;
|
||||
def IL_OP_D_MAX : ILOpCode<180, "dmax">;
|
||||
def IL_OP_D_SQRT : ILOpCode<181, "dsqrt">;
|
||||
def IL_OP_DP2_ADD : ILOpCode<182, "dp2add">;
|
||||
def IL_OP_F16_TO_F32 : ILOpCode<183, "f162f">;
|
||||
def IL_OP_F32_TO_F16 : ILOpCode<184, "f2f16">;
|
||||
def IL_REG_LOCAL_ID_FLAT : ILOpCode<185, "vTidInGrpFlat">;
|
||||
def IL_REG_LOCAL_ID : ILOpCode<186, "vTidInGrp">;
|
||||
def IL_REG_GLOBAL_ID_FLAT : ILOpCode<187, "vAbsTidFlag">;
|
||||
def IL_REG_GLOBAL_ID : ILOpCode<188, "vAbsTid">;
|
||||
def IL_REG_GROUP_ID_FLAT : ILOpCode<189, "vThreadGrpIDFlat">;
|
||||
def IL_REG_GROUP_ID : ILOpCode<190, "vThreadGrpID">;
|
||||
def IL_OP_D_RCP : ILOpCode<191, "drcp_zeroop(infinity)">;
|
||||
def IL_OP_D_RSQ : ILOpCode<192, "drsq_zeroop(infinity)">;
|
||||
def IL_OP_D_MOV : ILOpCode<193, "dmov">;
|
||||
def IL_OP_D_MOVC : ILOpCode<194, "dmovc">;
|
||||
def IL_OP_NOP : ILOpCode<195, "nop">;
|
||||
def IL_OP_UAV_ADD : ILOpCode<196, "uav_add">;
|
||||
def IL_OP_UAV_AND : ILOpCode<197, "uav_and">;
|
||||
def IL_OP_UAV_MAX : ILOpCode<198, "uav_max">;
|
||||
def IL_OP_UAV_MIN : ILOpCode<199, "uav_min">;
|
||||
def IL_OP_UAV_OR : ILOpCode<200, "uav_or">;
|
||||
def IL_OP_UAV_RSUB : ILOpCode<201, "uav_rsub">;
|
||||
def IL_OP_UAV_SUB : ILOpCode<202, "uav_sub">;
|
||||
def IL_OP_UAV_UMAX : ILOpCode<203, "uav_umax">;
|
||||
def IL_OP_UAV_UMIN : ILOpCode<204, "uav_umin">;
|
||||
def IL_OP_UAV_XOR : ILOpCode<205, "uav_xor">;
|
||||
def IL_OP_UAV_INC : ILOpCode<206, "uav_uinc">;
|
||||
def IL_OP_UAV_DEC : ILOpCode<207, "uav_udec">;
|
||||
def IL_OP_UAV_CMP : ILOpCode<208, "uav_cmp">;
|
||||
def IL_OP_UAV_READ_ADD : ILOpCode<209, "uav_read_add">;
|
||||
def IL_OP_UAV_READ_AND : ILOpCode<210, "uav_read_and">;
|
||||
def IL_OP_UAV_READ_MAX : ILOpCode<211, "uav_read_max">;
|
||||
def IL_OP_UAV_READ_MIN : ILOpCode<212, "uav_read_min">;
|
||||
def IL_OP_UAV_READ_OR : ILOpCode<213, "uav_read_or">;
|
||||
def IL_OP_UAV_READ_RSUB : ILOpCode<214, "uav_read_rsub">;
|
||||
def IL_OP_UAV_READ_SUB : ILOpCode<215, "uav_read_sub">;
|
||||
def IL_OP_UAV_READ_UMAX : ILOpCode<216, "uav_read_umax">;
|
||||
def IL_OP_UAV_READ_UMIN : ILOpCode<217, "uav_read_umin">;
|
||||
def IL_OP_UAV_READ_XOR : ILOpCode<218, "uav_read_xor">;
|
||||
def IL_OP_UAV_READ_INC : ILOpCode<219, "uav_read_uinc">;
|
||||
def IL_OP_UAV_READ_DEC : ILOpCode<220, "uav_read_udec">;
|
||||
def IL_OP_UAV_READ_XCHG : ILOpCode<221, "uav_read_xchg">;
|
||||
def IL_OP_UAV_READ_CMPXCHG : ILOpCode<222, "uav_read_cmp_xchg">;
|
||||
def IL_OP_LDS_ADD : ILOpCode<223, "lds_add">;
|
||||
def IL_OP_LDS_AND : ILOpCode<224, "lds_and">;
|
||||
def IL_OP_LDS_MAX : ILOpCode<225, "lds_max">;
|
||||
def IL_OP_LDS_MIN : ILOpCode<226, "lds_min">;
|
||||
def IL_OP_LDS_OR : ILOpCode<227, "lds_or">;
|
||||
def IL_OP_LDS_RSUB : ILOpCode<228, "lds_rsub">;
|
||||
def IL_OP_LDS_SUB : ILOpCode<229, "lds_sub">;
|
||||
def IL_OP_LDS_UMAX : ILOpCode<230, "lds_umax">;
|
||||
def IL_OP_LDS_UMIN : ILOpCode<231, "lds_umin">;
|
||||
def IL_OP_LDS_XOR : ILOpCode<232, "lds_xor">;
|
||||
def IL_OP_LDS_INC : ILOpCode<233, "lds_inc">;
|
||||
def IL_OP_LDS_DEC : ILOpCode<234, "lds_dec">;
|
||||
def IL_OP_LDS_CMP : ILOpCode<235, "lds_cmp">;
|
||||
def IL_OP_LDS_READ_ADD : ILOpCode<236, "lds_read_add">;
|
||||
def IL_OP_LDS_READ_AND : ILOpCode<237, "lds_read_and">;
|
||||
def IL_OP_LDS_READ_MAX : ILOpCode<238, "lds_read_max">;
|
||||
def IL_OP_LDS_READ_MIN : ILOpCode<239, "lds_read_min">;
|
||||
def IL_OP_LDS_READ_OR : ILOpCode<240, "lds_read_or">;
|
||||
def IL_OP_LDS_READ_RSUB : ILOpCode<241, "lds_read_rsub">;
|
||||
def IL_OP_LDS_READ_SUB : ILOpCode<242, "lds_read_sub">;
|
||||
def IL_OP_LDS_READ_UMAX : ILOpCode<243, "lds_read_umax">;
|
||||
def IL_OP_LDS_READ_UMIN : ILOpCode<244, "lds_read_umin">;
|
||||
def IL_OP_LDS_READ_XOR : ILOpCode<245, "lds_read_xor">;
|
||||
def IL_OP_LDS_READ_INC : ILOpCode<246, "lds_read_inc">;
|
||||
def IL_OP_LDS_READ_DEC : ILOpCode<247, "lds_read_dec">;
|
||||
def IL_OP_LDS_READ_XCHG : ILOpCode<248, "lds_read_xchg">;
|
||||
def IL_OP_LDS_READ_CMPXCHG : ILOpCode<249, "lds_read_cmp_xchg">;
|
||||
def IL_OP_GDS_ADD : ILOpCode<250, "gds_add">;
|
||||
def IL_OP_GDS_AND : ILOpCode<251, "gds_and">;
|
||||
def IL_OP_GDS_MAX : ILOpCode<252, "gds_max">;
|
||||
def IL_OP_GDS_MIN : ILOpCode<253, "gds_min">;
|
||||
def IL_OP_GDS_OR : ILOpCode<254, "gds_or">;
|
||||
def IL_OP_GDS_RSUB : ILOpCode<255, "gds_rsub">;
|
||||
def IL_OP_GDS_SUB : ILOpCode<256, "gds_sub">;
|
||||
def IL_OP_GDS_UMAX : ILOpCode<257, "gds_umax">;
|
||||
def IL_OP_GDS_UMIN : ILOpCode<258, "gds_umin">;
|
||||
def IL_OP_GDS_MSKOR : ILOpCode<259, "gds_mskor">;
|
||||
def IL_OP_GDS_XOR : ILOpCode<260, "gds_xor">;
|
||||
def IL_OP_GDS_INC : ILOpCode<261, "gds_inc">;
|
||||
def IL_OP_GDS_DEC : ILOpCode<262, "gds_dec">;
|
||||
def IL_OP_GDS_CMP : ILOpCode<263, "gds_cmp">;
|
||||
def IL_OP_GDS_READ_ADD : ILOpCode<264, "gds_read_add">;
|
||||
def IL_OP_GDS_READ_AND : ILOpCode<265, "gds_read_and">;
|
||||
def IL_OP_GDS_READ_MAX : ILOpCode<266, "gds_read_max">;
|
||||
def IL_OP_GDS_READ_MIN : ILOpCode<267, "gds_read_min">;
|
||||
def IL_OP_GDS_READ_OR : ILOpCode<268, "gds_read_or">;
|
||||
def IL_OP_GDS_READ_RSUB : ILOpCode<269, "gds_read_rsub">;
|
||||
def IL_OP_GDS_READ_SUB : ILOpCode<270, "gds_read_sub">;
|
||||
def IL_OP_GDS_READ_UMAX : ILOpCode<271, "gds_read_umax">;
|
||||
def IL_OP_GDS_READ_UMIN : ILOpCode<272, "gds_read_umin">;
|
||||
def IL_OP_GDS_READ_MSKOR : ILOpCode<273, "gds_read_mskor">;
|
||||
def IL_OP_GDS_READ_XOR : ILOpCode<274, "gds_read_xor">;
|
||||
def IL_OP_GDS_READ_INC : ILOpCode<275, "gds_read_inc">;
|
||||
def IL_OP_GDS_READ_DEC : ILOpCode<276, "gds_read_dec">;
|
||||
def IL_OP_GDS_READ_XCHG : ILOpCode<277, "gds_read_xchg">;
|
||||
def IL_OP_GDS_READ_CMPXCHG : ILOpCode<278, "gds_read_cmp_xchg">;
|
||||
def IL_OP_APPEND_BUF_ALLOC : ILOpCode<279, "append_buf_alloc">;
|
||||
def IL_OP_APPEND_BUF_CONSUME : ILOpCode<280, "append_buf_consume">;
|
||||
def IL_OP_I64_ADD : ILOpCode<281, "i64add">;
|
||||
def IL_OP_I64_MAX : ILOpCode<282, "i64max">;
|
||||
def IL_OP_U64_MAX : ILOpCode<283, "u64max">;
|
||||
def IL_OP_I64_MIN : ILOpCode<284, "i64min">;
|
||||
def IL_OP_U64_MIN : ILOpCode<285, "u64min">;
|
||||
def IL_OP_I64_NEGATE : ILOpCode<286, "i64negate">;
|
||||
def IL_OP_I64_SHL : ILOpCode<287, "i64shl">;
|
||||
def IL_OP_I64_SHR : ILOpCode<288, "i64shr">;
|
||||
def IL_OP_U64_SHR : ILOpCode<289, "u64shr">;
|
||||
def IL_OP_I64_EQ : ILOpCode<290, "i64eq">;
|
||||
def IL_OP_I64_GE : ILOpCode<291, "i64ge">;
|
||||
def IL_OP_U64_GE : ILOpCode<292, "u64ge">;
|
||||
def IL_OP_I64_LT : ILOpCode<293, "i64lt">;
|
||||
def IL_OP_U64_LT : ILOpCode<294, "u64lt">;
|
||||
def IL_OP_I64_NE : ILOpCode<295, "i64ne">;
|
||||
def IL_OP_U_MULHI24 : ILOpCode<296, "umul24_high">;
|
||||
def IL_OP_I_MULHI24 : ILOpCode<297, "imul24_high">;
|
||||
def IL_OP_GDS_LOAD : ILOpCode<298, "gds_load">;
|
||||
def IL_OP_GDS_STORE : ILOpCode<299, "gds_store">;
|
||||
def IL_OP_LDS_LOAD : ILOpCode<300, "lds_load">;
|
||||
def IL_OP_LDS_LOAD_VEC : ILOpCode<301, "lds_load_vec">;
|
||||
def IL_OP_LDS_LOAD_BYTE : ILOpCode<302, "lds_load_byte">;
|
||||
def IL_OP_LDS_LOAD_UBYTE : ILOpCode<303, "lds_load_ubyte">;
|
||||
def IL_OP_LDS_LOAD_SHORT : ILOpCode<304, "lds_load_short">;
|
||||
def IL_OP_LDS_LOAD_USHORT : ILOpCode<305, "lds_load_ushort">;
|
||||
def IL_OP_LDS_STORE : ILOpCode<306, "lds_store">;
|
||||
def IL_OP_LDS_STORE_VEC : ILOpCode<307, "lds_store_vec">;
|
||||
def IL_OP_LDS_STORE_BYTE : ILOpCode<308, "lds_store_byte">;
|
||||
def IL_OP_LDS_STORE_SHORT : ILOpCode<309, "lds_store_short">;
|
||||
def IL_OP_RAW_UAV_LOAD : ILOpCode<310, "uav_raw_load">;
|
||||
def IL_OP_RAW_UAV_STORE : ILOpCode<311, "uav_raw_store">;
|
||||
def IL_OP_ARENA_UAV_LOAD : ILOpCode<312, "uav_arena_load">;
|
||||
def IL_OP_ARENA_UAV_STORE : ILOpCode<313, "uav_arena_store">;
|
||||
def IL_OP_LDS_MSKOR : ILOpCode<314, "lds_mskor">;
|
||||
def IL_OP_LDS_READ_MSKOR : ILOpCode<315, "lds_read_mskor">;
|
||||
def IL_OP_UAV_BYTE_LOAD : ILOpCode<316, "uav_byte_load">;
|
||||
def IL_OP_UAV_UBYTE_LOAD : ILOpCode<317, "uav_ubyte_load">;
|
||||
def IL_OP_UAV_SHORT_LOAD : ILOpCode<318, "uav_short_load">;
|
||||
def IL_OP_UAV_USHORT_LOAD : ILOpCode<319, "uav_ushort_load">;
|
||||
def IL_OP_UAV_BYTE_STORE : ILOpCode<320, "uav_byte_store">;
|
||||
def IL_OP_UAV_SHORT_STORE : ILOpCode<320, "uav_short_store">;
|
||||
def IL_OP_UAV_STORE : ILOpCode<321, "uav_store">;
|
||||
def IL_OP_UAV_LOAD : ILOpCode<322, "uav_load">;
|
||||
def IL_OP_MUL : ILOpCode<323, "mul">;
|
||||
def IL_OP_DIV_INF : ILOpCode<324, "div_zeroop(infinity)">;
|
||||
def IL_OP_DIV_FLTMAX : ILOpCode<325, "div_zeroop(fltmax)">;
|
||||
def IL_OP_DIV_ZERO : ILOpCode<326, "div_zeroop(zero)">;
|
||||
def IL_OP_DIV_INFELSEMAX : ILOpCode<327, "div_zeroop(inf_else_max)">;
|
||||
def IL_OP_FTOI_FLR : ILOpCode<328, "ftoi_flr">;
|
||||
def IL_OP_FTOI_RPI : ILOpCode<329, "ftoi_rpi">;
|
||||
def IL_OP_F32_TO_F16_NEAR : ILOpCode<330, "f2f16_near">;
|
||||
def IL_OP_F32_TO_F16_NEG_INF : ILOpCode<331, "f2f16_neg_inf">;
|
||||
def IL_OP_F32_TO_F16_PLUS_INF : ILOpCode<332, "f2f16_plus_inf">;
|
||||
def IL_OP_I64_MUL : ILOpCode<333, "i64mul">;
|
||||
def IL_OP_U64_MUL : ILOpCode<334, "u64mul">;
|
||||
def IL_OP_CU_ID : ILOpCode<355, "cu_id">;
|
||||
def IL_OP_WAVE_ID : ILOpCode<356, "wave_id">;
|
||||
def IL_OP_I64_SUB : ILOpCode<357, "i64sub">;
|
||||
def IL_OP_I64_DIV : ILOpCode<358, "i64div">;
|
||||
def IL_OP_U64_DIV : ILOpCode<359, "u64div">;
|
||||
def IL_OP_I64_MOD : ILOpCode<360, "i64mod">;
|
||||
def IL_OP_U64_MOD : ILOpCode<361, "u64mod">;
|
||||
def IL_DCL_GWS_THREAD_COUNT : ILOpCode<362, "dcl_gws_thread_count">;
|
||||
def IL_DCL_SEMAPHORE : ILOpCode<363, "dcl_semaphore">;
|
||||
def IL_OP_SEMAPHORE_INIT : ILOpCode<364, "init_semaphore">;
|
||||
def IL_OP_SEMAPHORE_WAIT : ILOpCode<365, "semaphore_wait">;
|
||||
def IL_OP_SEMAPHORE_SIGNAL : ILOpCode<366, "semaphore_signal">;
|
||||
def IL_OP_BARRIER_REGION : ILOpCode<377, "fence_threads_gds">;
|
||||
def IL_OP_BFI : ILOpCode<394, "bfi">;
|
||||
def IL_OP_BFM : ILOpCode<395, "bfm">;
|
||||
def IL_DBG_STRING : ILOpCode<396, "dbg_string">;
|
||||
def IL_DBG_LINE : ILOpCode<397, "dbg_line">;
|
||||
def IL_DBG_TEMPLOC : ILOpCode<398, "dbg_temploc">;
|
|
@ -1,183 +0,0 @@
|
|||
//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#include "AMDILEvergreenDevice.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AMDILEvergreenDevice::AMDILEvergreenDevice(AMDILSubtarget *ST)
|
||||
: AMDILDevice(ST) {
|
||||
setCaps();
|
||||
std::string name = ST->getDeviceName();
|
||||
if (name == "cedar") {
|
||||
mDeviceFlag = OCL_DEVICE_CEDAR;
|
||||
} else if (name == "redwood") {
|
||||
mDeviceFlag = OCL_DEVICE_REDWOOD;
|
||||
} else if (name == "cypress") {
|
||||
mDeviceFlag = OCL_DEVICE_CYPRESS;
|
||||
} else {
|
||||
mDeviceFlag = OCL_DEVICE_JUNIPER;
|
||||
}
|
||||
}
|
||||
|
||||
AMDILEvergreenDevice::~AMDILEvergreenDevice() {
|
||||
}
|
||||
|
||||
size_t AMDILEvergreenDevice::getMaxLDSSize() const {
|
||||
if (usesHardware(AMDILDeviceInfo::LocalMem)) {
|
||||
return MAX_LDS_SIZE_800;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
size_t AMDILEvergreenDevice::getMaxGDSSize() const {
|
||||
if (usesHardware(AMDILDeviceInfo::RegionMem)) {
|
||||
return MAX_LDS_SIZE_800;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
uint32_t AMDILEvergreenDevice::getMaxNumUAVs() const {
|
||||
return 12;
|
||||
}
|
||||
|
||||
uint32_t AMDILEvergreenDevice::getResourceID(uint32_t id) const {
|
||||
switch(id) {
|
||||
default:
|
||||
assert(0 && "ID type passed in is unknown!");
|
||||
break;
|
||||
case CONSTANT_ID:
|
||||
case RAW_UAV_ID:
|
||||
if (mSTM->calVersion() >= CAL_VERSION_GLOBAL_RETURN_BUFFER) {
|
||||
return GLOBAL_RETURN_RAW_UAV_ID;
|
||||
} else {
|
||||
return DEFAULT_RAW_UAV_ID;
|
||||
}
|
||||
case GLOBAL_ID:
|
||||
case ARENA_UAV_ID:
|
||||
return DEFAULT_ARENA_UAV_ID;
|
||||
case LDS_ID:
|
||||
if (usesHardware(AMDILDeviceInfo::LocalMem)) {
|
||||
return DEFAULT_LDS_ID;
|
||||
} else {
|
||||
return DEFAULT_ARENA_UAV_ID;
|
||||
}
|
||||
case GDS_ID:
|
||||
if (usesHardware(AMDILDeviceInfo::RegionMem)) {
|
||||
return DEFAULT_GDS_ID;
|
||||
} else {
|
||||
return DEFAULT_ARENA_UAV_ID;
|
||||
}
|
||||
case SCRATCH_ID:
|
||||
if (usesHardware(AMDILDeviceInfo::PrivateMem)) {
|
||||
return DEFAULT_SCRATCH_ID;
|
||||
} else {
|
||||
return DEFAULT_ARENA_UAV_ID;
|
||||
}
|
||||
};
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t AMDILEvergreenDevice::getWavefrontSize() const {
|
||||
return AMDILDevice::WavefrontSize;
|
||||
}
|
||||
|
||||
uint32_t AMDILEvergreenDevice::getGeneration() const {
|
||||
return AMDILDeviceInfo::HD5XXX;
|
||||
}
|
||||
|
||||
void AMDILEvergreenDevice::setCaps() {
|
||||
mSWBits.set(AMDILDeviceInfo::ArenaSegment);
|
||||
mHWBits.set(AMDILDeviceInfo::ArenaUAV);
|
||||
if (mSTM->calVersion() >= CAL_VERSION_SC_140) {
|
||||
mHWBits.set(AMDILDeviceInfo::HW64BitDivMod);
|
||||
mSWBits.reset(AMDILDeviceInfo::HW64BitDivMod);
|
||||
}
|
||||
mSWBits.set(AMDILDeviceInfo::Signed24BitOps);
|
||||
if (mSTM->isOverride(AMDILDeviceInfo::ByteStores)) {
|
||||
mHWBits.set(AMDILDeviceInfo::ByteStores);
|
||||
}
|
||||
if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
|
||||
mSWBits.set(AMDILDeviceInfo::LocalMem);
|
||||
mSWBits.set(AMDILDeviceInfo::RegionMem);
|
||||
} else {
|
||||
mHWBits.set(AMDILDeviceInfo::LocalMem);
|
||||
mHWBits.set(AMDILDeviceInfo::RegionMem);
|
||||
}
|
||||
mHWBits.set(AMDILDeviceInfo::Images);
|
||||
if (mSTM->isOverride(AMDILDeviceInfo::NoAlias)) {
|
||||
mHWBits.set(AMDILDeviceInfo::NoAlias);
|
||||
}
|
||||
if (mSTM->calVersion() > CAL_VERSION_GLOBAL_RETURN_BUFFER) {
|
||||
mHWBits.set(AMDILDeviceInfo::CachedMem);
|
||||
}
|
||||
if (mSTM->isOverride(AMDILDeviceInfo::MultiUAV)) {
|
||||
mHWBits.set(AMDILDeviceInfo::MultiUAV);
|
||||
}
|
||||
if (mSTM->calVersion() > CAL_VERSION_SC_136) {
|
||||
mHWBits.set(AMDILDeviceInfo::ByteLDSOps);
|
||||
mSWBits.reset(AMDILDeviceInfo::ByteLDSOps);
|
||||
mHWBits.set(AMDILDeviceInfo::ArenaVectors);
|
||||
} else {
|
||||
mSWBits.set(AMDILDeviceInfo::ArenaVectors);
|
||||
}
|
||||
if (mSTM->calVersion() > CAL_VERSION_SC_137) {
|
||||
mHWBits.set(AMDILDeviceInfo::LongOps);
|
||||
mSWBits.reset(AMDILDeviceInfo::LongOps);
|
||||
}
|
||||
mHWBits.set(AMDILDeviceInfo::TmrReg);
|
||||
}
|
||||
|
||||
AMDILCypressDevice::AMDILCypressDevice(AMDILSubtarget *ST)
|
||||
: AMDILEvergreenDevice(ST) {
|
||||
setCaps();
|
||||
}
|
||||
|
||||
AMDILCypressDevice::~AMDILCypressDevice() {
|
||||
}
|
||||
|
||||
void AMDILCypressDevice::setCaps() {
|
||||
if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
|
||||
mHWBits.set(AMDILDeviceInfo::DoubleOps);
|
||||
mHWBits.set(AMDILDeviceInfo::FMA);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
AMDILCedarDevice::AMDILCedarDevice(AMDILSubtarget *ST)
|
||||
: AMDILEvergreenDevice(ST) {
|
||||
setCaps();
|
||||
}
|
||||
|
||||
AMDILCedarDevice::~AMDILCedarDevice() {
|
||||
}
|
||||
|
||||
void AMDILCedarDevice::setCaps() {
|
||||
mSWBits.set(AMDILDeviceInfo::FMA);
|
||||
}
|
||||
|
||||
size_t AMDILCedarDevice::getWavefrontSize() const {
|
||||
return AMDILDevice::QuarterWavefrontSize;
|
||||
}
|
||||
|
||||
AMDILRedwoodDevice::AMDILRedwoodDevice(AMDILSubtarget *ST)
|
||||
: AMDILEvergreenDevice(ST) {
|
||||
setCaps();
|
||||
}
|
||||
|
||||
AMDILRedwoodDevice::~AMDILRedwoodDevice()
|
||||
{
|
||||
}
|
||||
|
||||
void AMDILRedwoodDevice::setCaps() {
|
||||
mSWBits.set(AMDILDeviceInfo::FMA);
|
||||
}
|
||||
|
||||
size_t AMDILRedwoodDevice::getWavefrontSize() const {
|
||||
return AMDILDevice::HalfWavefrontSize;
|
||||
}
|
|
@ -1,87 +0,0 @@
|
|||
//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface for the subtarget data classes.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// This file will define the interface that each generation needs to
|
||||
// implement in order to correctly answer queries on the capabilities of the
|
||||
// specific hardware.
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef _AMDILEVERGREENDEVICE_H_
|
||||
#define _AMDILEVERGREENDEVICE_H_
|
||||
#include "AMDILDevice.h"
|
||||
#include "AMDILSubtarget.h"
|
||||
|
||||
namespace llvm {
|
||||
class AMDILSubtarget;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Evergreen generation of devices and their respective sub classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
// The AMDILEvergreenDevice is the base device class for all of the Evergreen
|
||||
// series of cards. This class contains information required to differentiate
|
||||
// the Evergreen device from the generic AMDILDevice. This device represents
|
||||
// that capabilities of the 'Juniper' cards, also known as the HD57XX.
|
||||
class AMDILEvergreenDevice : public AMDILDevice {
|
||||
public:
|
||||
AMDILEvergreenDevice(AMDILSubtarget *ST);
|
||||
virtual ~AMDILEvergreenDevice();
|
||||
virtual size_t getMaxLDSSize() const;
|
||||
virtual size_t getMaxGDSSize() const;
|
||||
virtual size_t getWavefrontSize() const;
|
||||
virtual uint32_t getGeneration() const;
|
||||
virtual uint32_t getMaxNumUAVs() const;
|
||||
virtual uint32_t getResourceID(uint32_t) const;
|
||||
protected:
|
||||
virtual void setCaps();
|
||||
}; // AMDILEvergreenDevice
|
||||
|
||||
// The AMDILCypressDevice is similiar to the AMDILEvergreenDevice, except it has
|
||||
// support for double precision operations. This device is used to represent
|
||||
// both the Cypress and Hemlock cards, which are commercially known as HD58XX
|
||||
// and HD59XX cards.
|
||||
class AMDILCypressDevice : public AMDILEvergreenDevice {
|
||||
public:
|
||||
AMDILCypressDevice(AMDILSubtarget *ST);
|
||||
virtual ~AMDILCypressDevice();
|
||||
private:
|
||||
virtual void setCaps();
|
||||
}; // AMDILCypressDevice
|
||||
|
||||
|
||||
// The AMDILCedarDevice is the class that represents all of the 'Cedar' based
|
||||
// devices. This class differs from the base AMDILEvergreenDevice in that the
|
||||
// device is a ~quarter of the 'Juniper'. These are commercially known as the
|
||||
// HD54XX and HD53XX series of cards.
|
||||
class AMDILCedarDevice : public AMDILEvergreenDevice {
|
||||
public:
|
||||
AMDILCedarDevice(AMDILSubtarget *ST);
|
||||
virtual ~AMDILCedarDevice();
|
||||
virtual size_t getWavefrontSize() const;
|
||||
private:
|
||||
virtual void setCaps();
|
||||
}; // AMDILCedarDevice
|
||||
|
||||
// The AMDILRedwoodDevice is the class the represents all of the 'Redwood' based
|
||||
// devices. This class differs from the base class, in that these devices are
|
||||
// considered about half of a 'Juniper' device. These are commercially known as
|
||||
// the HD55XX and HD56XX series of cards.
|
||||
class AMDILRedwoodDevice : public AMDILEvergreenDevice {
|
||||
public:
|
||||
AMDILRedwoodDevice(AMDILSubtarget *ST);
|
||||
virtual ~AMDILRedwoodDevice();
|
||||
virtual size_t getWavefrontSize() const;
|
||||
private:
|
||||
virtual void setCaps();
|
||||
}; // AMDILRedwoodDevice
|
||||
|
||||
} // namespace llvm
|
||||
#endif // _AMDILEVERGREENDEVICE_H_
|
|
@ -1,175 +0,0 @@
|
|||
//==- AMDILFormats.td - AMDIL Instruction Formats ----*- tablegen -*-==//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
//===--------------------------------------------------------------------===//
|
||||
include "AMDILTokenDesc.td"
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// The parent IL instruction class that inherits the Instruction class. This
|
||||
// class sets the corresponding namespace, the out and input dag lists the
|
||||
// pattern to match to and the string to print out for the assembly printer.
|
||||
//===--------------------------------------------------------------------===//
|
||||
class ILFormat<ILOpCode op, dag outs, dag ins, string asmstr, list<dag> pattern>
|
||||
: Instruction {
|
||||
|
||||
let Namespace = "AMDGPU";
|
||||
dag OutOperandList = outs;
|
||||
dag InOperandList = ins;
|
||||
ILOpCode operation = op;
|
||||
let Pattern = pattern;
|
||||
let AsmString = !strconcat(asmstr, "\n");
|
||||
let isPseudo = 1;
|
||||
let Itinerary = NullALU;
|
||||
bit hasIEEEFlag = 0;
|
||||
bit hasZeroOpFlag = 0;
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Class that has one input parameters and one output parameter.
|
||||
// The basic pattern for this class is "Opcode Dst, Src0" and
|
||||
// handles the unary math operators.
|
||||
// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
|
||||
// if the addressing is register relative for input and output register 0.
|
||||
//===--------------------------------------------------------------------===//
|
||||
class OneInOneOut<ILOpCode op, dag outs, dag ins,
|
||||
string asmstr, list<dag> pattern>
|
||||
: ILFormat<op, outs, ins, asmstr, pattern>
|
||||
{
|
||||
ILDst dst_reg;
|
||||
ILDstMod dst_mod;
|
||||
ILRelAddr dst_rel;
|
||||
ILSrc dst_reg_rel;
|
||||
ILSrcMod dst_reg_rel_mod;
|
||||
ILSrc src0_reg;
|
||||
ILSrcMod src0_mod;
|
||||
ILRelAddr src0_rel;
|
||||
ILSrc src0_reg_rel;
|
||||
ILSrcMod src0_reg_rel_mod;
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// This class is similiar to the UnaryOp class, however, there is no
|
||||
// result value to assign.
|
||||
//===--------------------------------------------------------------------===//
|
||||
class UnaryOpNoRet<ILOpCode op, dag outs, dag ins,
|
||||
string asmstr, list<dag> pattern>
|
||||
: ILFormat<op, outs, ins, asmstr, pattern>
|
||||
{
|
||||
ILSrc src0_reg;
|
||||
ILSrcMod src0_mod;
|
||||
ILRelAddr src0_rel;
|
||||
ILSrc src0_reg_rel;
|
||||
ILSrcMod src0_reg_rel_mod;
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Set of classes that have two input parameters and one output parameter.
|
||||
// The basic pattern for this class is "Opcode Dst, Src0, Src1" and
|
||||
// handles the binary math operators and comparison operations.
|
||||
// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
|
||||
// if the addressing is register relative for input register 1.
|
||||
//===--------------------------------------------------------------------===//
|
||||
class TwoInOneOut<ILOpCode op, dag outs, dag ins,
|
||||
string asmstr, list<dag> pattern>
|
||||
: OneInOneOut<op, outs, ins, asmstr, pattern>
|
||||
{
|
||||
ILSrc src1_reg;
|
||||
ILSrcMod src1_mod;
|
||||
ILRelAddr src1_rel;
|
||||
ILSrc src1_reg_rel;
|
||||
ILSrcMod src1_reg_rel_mod;
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Similiar to the UnaryOpNoRet class, but takes as arguments two input
|
||||
// operands. Used mainly for barrier instructions on PC platform.
|
||||
//===--------------------------------------------------------------------===//
|
||||
class BinaryOpNoRet<ILOpCode op, dag outs, dag ins,
|
||||
string asmstr, list<dag> pattern>
|
||||
: UnaryOpNoRet<op, outs, ins, asmstr, pattern>
|
||||
{
|
||||
ILSrc src1_reg;
|
||||
ILSrcMod src1_mod;
|
||||
ILRelAddr src1_rel;
|
||||
ILSrc src1_reg_rel;
|
||||
ILSrcMod src1_reg_rel_mod;
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Set of classes that have three input parameters and one output parameter.
|
||||
// The basic pattern for this class is "Opcode Dst, Src0, Src1, Src2" and
|
||||
// handles the mad and conditional mov instruction.
|
||||
// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
|
||||
// if the addressing is register relative.
|
||||
// This class is the parent class of TernaryOp
|
||||
//===--------------------------------------------------------------------===//
|
||||
class ThreeInOneOut<ILOpCode op, dag outs, dag ins,
|
||||
string asmstr, list<dag> pattern>
|
||||
: TwoInOneOut<op, outs, ins, asmstr, pattern> {
|
||||
ILSrc src2_reg;
|
||||
ILSrcMod src2_mod;
|
||||
ILRelAddr src2_rel;
|
||||
ILSrc src2_reg_rel;
|
||||
ILSrcMod src2_reg_rel_mod;
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Intrinsic classes
|
||||
// Generic versions of the above classes but for Target specific intrinsics
|
||||
// instead of SDNode patterns.
|
||||
//===--------------------------------------------------------------------===//
|
||||
let TargetPrefix = "AMDIL", isTarget = 1 in {
|
||||
class VoidIntLong :
|
||||
Intrinsic<[llvm_i64_ty], [], []>;
|
||||
class VoidIntInt :
|
||||
Intrinsic<[llvm_i32_ty], [], []>;
|
||||
class VoidIntBool :
|
||||
Intrinsic<[llvm_i32_ty], [], []>;
|
||||
class UnaryIntInt :
|
||||
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
|
||||
class UnaryIntFloat :
|
||||
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
|
||||
class ConvertIntFTOI :
|
||||
Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
|
||||
class ConvertIntITOF :
|
||||
Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
|
||||
class UnaryIntNoRetInt :
|
||||
Intrinsic<[], [llvm_anyint_ty], []>;
|
||||
class UnaryIntNoRetFloat :
|
||||
Intrinsic<[], [llvm_anyfloat_ty], []>;
|
||||
class BinaryIntInt :
|
||||
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
||||
class BinaryIntFloat :
|
||||
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
||||
class BinaryIntNoRetInt :
|
||||
Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
|
||||
class BinaryIntNoRetFloat :
|
||||
Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
|
||||
class TernaryIntInt :
|
||||
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
|
||||
LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
||||
class TernaryIntFloat :
|
||||
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
|
||||
LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
||||
class QuaternaryIntInt :
|
||||
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
|
||||
LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
||||
class UnaryAtomicInt :
|
||||
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
|
||||
class BinaryAtomicInt :
|
||||
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
|
||||
class TernaryAtomicInt :
|
||||
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
|
||||
class UnaryAtomicIntNoRet :
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
|
||||
class BinaryAtomicIntNoRet :
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
|
||||
class TernaryAtomicIntNoRet :
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
|
||||
}
|
|
@ -1,53 +0,0 @@
|
|||
//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface to describe a layout of a stack frame on a AMDIL target machine
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "AMDILFrameLowering.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
AMDILFrameLowering::AMDILFrameLowering(StackDirection D, unsigned StackAl,
|
||||
int LAO, unsigned TransAl)
|
||||
: TargetFrameLowering(D, StackAl, LAO, TransAl)
|
||||
{
|
||||
}
|
||||
|
||||
AMDILFrameLowering::~AMDILFrameLowering()
|
||||
{
|
||||
}
|
||||
|
||||
/// getFrameIndexOffset - Returns the displacement from the frame register to
|
||||
/// the stack frame of the specified index.
|
||||
int AMDILFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
|
||||
int FI) const {
|
||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
return MFI->getObjectOffset(FI);
|
||||
}
|
||||
|
||||
const TargetFrameLowering::SpillSlot *
|
||||
AMDILFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const
|
||||
{
|
||||
NumEntries = 0;
|
||||
return 0;
|
||||
}
|
||||
void
|
||||
AMDILFrameLowering::emitPrologue(MachineFunction &MF) const
|
||||
{
|
||||
}
|
||||
void
|
||||
AMDILFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
|
||||
{
|
||||
}
|
||||
bool
|
||||
AMDILFrameLowering::hasFP(const MachineFunction &MF) const
|
||||
{
|
||||
return false;
|
||||
}
|
|
@ -1,46 +0,0 @@
|
|||
//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface to describe a layout of a stack frame on a AMDIL target machine
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef _AMDILFRAME_LOWERING_H_
|
||||
#define _AMDILFRAME_LOWERING_H_
|
||||
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/Target/TargetFrameLowering.h"
|
||||
|
||||
/// Information about the stack frame layout on the AMDIL targets. It holds
|
||||
/// the direction of the stack growth, the known stack alignment on entry to
|
||||
/// each function, and the offset to the locals area.
|
||||
/// See TargetFrameInfo for more comments.
|
||||
|
||||
namespace llvm {
|
||||
class AMDILFrameLowering : public TargetFrameLowering {
|
||||
public:
|
||||
AMDILFrameLowering(StackDirection D, unsigned StackAl, int LAO, unsigned
|
||||
TransAl = 1);
|
||||
virtual ~AMDILFrameLowering();
|
||||
virtual int getFrameIndexOffset(const MachineFunction &MF,
|
||||
int FI) const;
|
||||
virtual const SpillSlot *
|
||||
getCalleeSavedSpillSlots(unsigned &NumEntries) const;
|
||||
virtual void emitPrologue(MachineFunction &MF) const;
|
||||
virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
|
||||
virtual bool hasFP(const MachineFunction &MF) const;
|
||||
}; // class AMDILFrameLowering
|
||||
} // namespace llvm
|
||||
#endif // _AMDILFRAME_LOWERING_H_
|
|
@ -1,393 +0,0 @@
|
|||
//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines an instruction selector for the AMDIL target.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "AMDGPUISelLowering.h" // For AMDGPUISD
|
||||
#include "AMDILDevices.h"
|
||||
#include "AMDILUtilityFunctions.h"
|
||||
#include "llvm/ADT/ValueMap.h"
|
||||
#include "llvm/CodeGen/PseudoSourceValue.h"
|
||||
#include "llvm/CodeGen/SelectionDAGISel.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include <list>
|
||||
#include <queue>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction Selector Implementation
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDILDAGToDAGISel - AMDIL specific code to select AMDIL machine instructions
|
||||
// //for SelectionDAG operations.
|
||||
//
|
||||
namespace {
|
||||
class AMDILDAGToDAGISel : public SelectionDAGISel {
|
||||
// Subtarget - Keep a pointer to the AMDIL Subtarget around so that we can
|
||||
// make the right decision when generating code for different targets.
|
||||
const AMDILSubtarget &Subtarget;
|
||||
public:
|
||||
AMDILDAGToDAGISel(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
|
||||
virtual ~AMDILDAGToDAGISel();
|
||||
|
||||
SDNode *Select(SDNode *N);
|
||||
virtual const char *getPassName() const;
|
||||
|
||||
private:
|
||||
inline SDValue getSmallIPtrImm(unsigned Imm);
|
||||
|
||||
// Complex pattern selectors
|
||||
bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
|
||||
bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
|
||||
bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
|
||||
|
||||
static bool checkType(const Value *ptr, unsigned int addrspace);
|
||||
static const Value *getBasePointerValue(const Value *V);
|
||||
|
||||
static bool isGlobalStore(const StoreSDNode *N);
|
||||
static bool isPrivateStore(const StoreSDNode *N);
|
||||
static bool isLocalStore(const StoreSDNode *N);
|
||||
static bool isRegionStore(const StoreSDNode *N);
|
||||
|
||||
static bool isCPLoad(const LoadSDNode *N);
|
||||
static bool isConstantLoad(const LoadSDNode *N, int cbID);
|
||||
static bool isGlobalLoad(const LoadSDNode *N);
|
||||
static bool isPrivateLoad(const LoadSDNode *N);
|
||||
static bool isLocalLoad(const LoadSDNode *N);
|
||||
static bool isRegionLoad(const LoadSDNode *N);
|
||||
|
||||
bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
|
||||
bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
|
||||
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
|
||||
|
||||
// Include the pieces autogenerated from the target description.
|
||||
#include "AMDGPUGenDAGISel.inc"
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
// createAMDILISelDag - This pass converts a legalized DAG into a AMDIL-specific
|
||||
// DAG, ready for instruction scheduling.
|
||||
//
|
||||
FunctionPass *llvm::createAMDILISelDag(TargetMachine &TM
|
||||
AMDIL_OPT_LEVEL_DECL) {
|
||||
return new AMDILDAGToDAGISel(TM AMDIL_OPT_LEVEL_VAR);
|
||||
}
|
||||
|
||||
AMDILDAGToDAGISel::AMDILDAGToDAGISel(TargetMachine &TM
|
||||
AMDIL_OPT_LEVEL_DECL)
|
||||
: SelectionDAGISel(TM AMDIL_OPT_LEVEL_VAR), Subtarget(TM.getSubtarget<AMDILSubtarget>())
|
||||
{
|
||||
}
|
||||
|
||||
AMDILDAGToDAGISel::~AMDILDAGToDAGISel() {
|
||||
}
|
||||
|
||||
SDValue AMDILDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
|
||||
return CurDAG->getTargetConstant(Imm, MVT::i32);
|
||||
}
|
||||
|
||||
bool AMDILDAGToDAGISel::SelectADDRParam(
|
||||
SDValue Addr, SDValue& R1, SDValue& R2) {
|
||||
|
||||
if (Addr.getOpcode() == ISD::FrameIndex) {
|
||||
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
|
||||
R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
|
||||
R2 = CurDAG->getTargetConstant(0, MVT::i32);
|
||||
} else {
|
||||
R1 = Addr;
|
||||
R2 = CurDAG->getTargetConstant(0, MVT::i32);
|
||||
}
|
||||
} else if (Addr.getOpcode() == ISD::ADD) {
|
||||
R1 = Addr.getOperand(0);
|
||||
R2 = Addr.getOperand(1);
|
||||
} else {
|
||||
R1 = Addr;
|
||||
R2 = CurDAG->getTargetConstant(0, MVT::i32);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDILDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
|
||||
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
|
||||
Addr.getOpcode() == ISD::TargetGlobalAddress) {
|
||||
return false;
|
||||
}
|
||||
return SelectADDRParam(Addr, R1, R2);
|
||||
}
|
||||
|
||||
|
||||
bool AMDILDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
|
||||
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
|
||||
Addr.getOpcode() == ISD::TargetGlobalAddress) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Addr.getOpcode() == ISD::FrameIndex) {
|
||||
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
|
||||
R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
|
||||
R2 = CurDAG->getTargetConstant(0, MVT::i64);
|
||||
} else {
|
||||
R1 = Addr;
|
||||
R2 = CurDAG->getTargetConstant(0, MVT::i64);
|
||||
}
|
||||
} else if (Addr.getOpcode() == ISD::ADD) {
|
||||
R1 = Addr.getOperand(0);
|
||||
R2 = Addr.getOperand(1);
|
||||
} else {
|
||||
R1 = Addr;
|
||||
R2 = CurDAG->getTargetConstant(0, MVT::i64);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
SDNode *AMDILDAGToDAGISel::Select(SDNode *N) {
|
||||
unsigned int Opc = N->getOpcode();
|
||||
if (N->isMachineOpcode()) {
|
||||
return NULL; // Already selected.
|
||||
}
|
||||
switch (Opc) {
|
||||
default: break;
|
||||
case ISD::FrameIndex:
|
||||
{
|
||||
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
|
||||
unsigned int FI = FIN->getIndex();
|
||||
EVT OpVT = N->getValueType(0);
|
||||
unsigned int NewOpc = AMDGPU::COPY;
|
||||
SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
|
||||
return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
return SelectCode(N);
|
||||
}
|
||||
|
||||
bool AMDILDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
|
||||
if (!ptr) {
|
||||
return false;
|
||||
}
|
||||
Type *ptrType = ptr->getType();
|
||||
return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
|
||||
}
|
||||
|
||||
const Value * AMDILDAGToDAGISel::getBasePointerValue(const Value *V)
|
||||
{
|
||||
if (!V) {
|
||||
return NULL;
|
||||
}
|
||||
const Value *ret = NULL;
|
||||
ValueMap<const Value *, bool> ValueBitMap;
|
||||
std::queue<const Value *, std::list<const Value *> > ValueQueue;
|
||||
ValueQueue.push(V);
|
||||
while (!ValueQueue.empty()) {
|
||||
V = ValueQueue.front();
|
||||
if (ValueBitMap.find(V) == ValueBitMap.end()) {
|
||||
ValueBitMap[V] = true;
|
||||
if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
|
||||
ret = V;
|
||||
break;
|
||||
} else if (dyn_cast<GlobalVariable>(V)) {
|
||||
ret = V;
|
||||
break;
|
||||
} else if (dyn_cast<Constant>(V)) {
|
||||
const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
|
||||
if (CE) {
|
||||
ValueQueue.push(CE->getOperand(0));
|
||||
}
|
||||
} else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
|
||||
ret = AI;
|
||||
break;
|
||||
} else if (const Instruction *I = dyn_cast<Instruction>(V)) {
|
||||
uint32_t numOps = I->getNumOperands();
|
||||
for (uint32_t x = 0; x < numOps; ++x) {
|
||||
ValueQueue.push(I->getOperand(x));
|
||||
}
|
||||
} else {
|
||||
// assert(0 && "Found a Value that we didn't know how to handle!");
|
||||
}
|
||||
}
|
||||
ValueQueue.pop();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool AMDILDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
|
||||
return checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
|
||||
}
|
||||
|
||||
bool AMDILDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
|
||||
return (!checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
|
||||
&& !checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
|
||||
&& !checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS));
|
||||
}
|
||||
|
||||
bool AMDILDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
|
||||
return checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
|
||||
}
|
||||
|
||||
bool AMDILDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
|
||||
return checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
|
||||
}
|
||||
|
||||
bool AMDILDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
|
||||
if (checkType(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)) {
|
||||
return true;
|
||||
}
|
||||
MachineMemOperand *MMO = N->getMemOperand();
|
||||
const Value *V = MMO->getValue();
|
||||
const Value *BV = getBasePointerValue(V);
|
||||
if (MMO
|
||||
&& MMO->getValue()
|
||||
&& ((V && dyn_cast<GlobalValue>(V))
|
||||
|| (BV && dyn_cast<GlobalValue>(
|
||||
getBasePointerValue(MMO->getValue()))))) {
|
||||
return checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool AMDILDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
|
||||
return checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
|
||||
}
|
||||
|
||||
bool AMDILDAGToDAGISel::isLocalLoad(const LoadSDNode *N) {
|
||||
return checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
|
||||
}
|
||||
|
||||
bool AMDILDAGToDAGISel::isRegionLoad(const LoadSDNode *N) {
|
||||
return checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
|
||||
}
|
||||
|
||||
bool AMDILDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
|
||||
MachineMemOperand *MMO = N->getMemOperand();
|
||||
if (checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
|
||||
if (MMO) {
|
||||
const Value *V = MMO->getValue();
|
||||
const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
|
||||
if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDILDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
|
||||
if (checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
|
||||
// Check to make sure we are not a constant pool load or a constant load
|
||||
// that is marked as a private load
|
||||
if (isCPLoad(N) || isConstantLoad(N, -1)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (!checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
|
||||
&& !checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
|
||||
&& !checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS)
|
||||
&& !checkType(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
|
||||
&& !checkType(N->getSrcValue(), AMDILAS::PARAM_D_ADDRESS)
|
||||
&& !checkType(N->getSrcValue(), AMDILAS::PARAM_I_ADDRESS))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
const char *AMDILDAGToDAGISel::getPassName() const {
|
||||
return "AMDIL DAG->DAG Pattern Instruction Selection";
|
||||
}
|
||||
|
||||
#ifdef DEBUGTMP
|
||||
#undef INT64_C
|
||||
#endif
|
||||
#undef DEBUGTMP
|
||||
|
||||
///==== AMDGPU Functions ====///
|
||||
|
||||
bool AMDILDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
|
||||
SDValue& Offset) {
|
||||
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
|
||||
Addr.getOpcode() == ISD::TargetGlobalAddress) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
if (Addr.getOpcode() == ISD::ADD) {
|
||||
bool Match = false;
|
||||
|
||||
// Find the base ptr and the offset
|
||||
for (unsigned i = 0; i < Addr.getNumOperands(); i++) {
|
||||
SDValue Arg = Addr.getOperand(i);
|
||||
ConstantSDNode * OffsetNode = dyn_cast<ConstantSDNode>(Arg);
|
||||
// This arg isn't a constant so it must be the base PTR.
|
||||
if (!OffsetNode) {
|
||||
Base = Addr.getOperand(i);
|
||||
continue;
|
||||
}
|
||||
// Check if the constant argument fits in 8-bits. The offset is in bytes
|
||||
// so we need to convert it to dwords.
|
||||
if (isInt<8>(OffsetNode->getZExtValue() >> 2)) {
|
||||
Match = true;
|
||||
Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2,
|
||||
MVT::i32);
|
||||
}
|
||||
}
|
||||
return Match;
|
||||
}
|
||||
|
||||
// Default case, no offset
|
||||
Base = Addr;
|
||||
Offset = CurDAG->getTargetConstant(0, MVT::i32);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDILDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
|
||||
SDValue &Offset)
|
||||
{
|
||||
ConstantSDNode * IMMOffset;
|
||||
|
||||
if (Addr.getOpcode() == ISD::ADD
|
||||
&& (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
|
||||
&& isInt<16>(IMMOffset->getZExtValue())) {
|
||||
|
||||
Base = Addr.getOperand(0);
|
||||
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
|
||||
return true;
|
||||
// If the pointer address is constant, we can move it to the offset field.
|
||||
} else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
|
||||
&& isInt<16>(IMMOffset->getZExtValue())) {
|
||||
Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
|
||||
CurDAG->getEntryNode().getDebugLoc(),
|
||||
AMDGPU::ZERO, MVT::i32);
|
||||
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Default case, no offset
|
||||
Base = Addr;
|
||||
Offset = CurDAG->getTargetConstant(0, MVT::i32);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDILDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base,
|
||||
SDValue& Offset) {
|
||||
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
|
||||
Addr.getOpcode() == ISD::TargetGlobalAddress ||
|
||||
Addr.getOpcode() != ISD::ADD) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Base = Addr.getOperand(0);
|
||||
Offset = Addr.getOperand(1);
|
||||
|
||||
return false;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,203 +0,0 @@
|
|||
//===-- AMDILISelLowering.h - AMDIL DAG Lowering Interface ------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the interfaces that AMDIL uses to lower LLVM code into a
|
||||
// selection DAG.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDIL_ISELLOWERING_H_
|
||||
#define AMDIL_ISELLOWERING_H_
|
||||
#include "AMDIL.h"
|
||||
#include "llvm/CodeGen/CallingConvLower.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
#include "llvm/Target/TargetLowering.h"
|
||||
|
||||
namespace llvm
|
||||
{
|
||||
namespace AMDILISD
|
||||
{
|
||||
enum
|
||||
{
|
||||
FIRST_NUMBER = ISD::BUILTIN_OP_END,
|
||||
CMOVLOG, // 32bit FP Conditional move logical instruction
|
||||
MAD, // 32bit Fused Multiply Add instruction
|
||||
VBUILD, // scalar to vector mov instruction
|
||||
CALL, // Function call based on a single integer
|
||||
SELECT_CC, // Select the correct conditional instruction
|
||||
UMUL, // 32bit unsigned multiplication
|
||||
DIV_INF, // Divide with infinity returned on zero divisor
|
||||
CMP,
|
||||
IL_CC_I_GT,
|
||||
IL_CC_I_LT,
|
||||
IL_CC_I_GE,
|
||||
IL_CC_I_LE,
|
||||
IL_CC_I_EQ,
|
||||
IL_CC_I_NE,
|
||||
RET_FLAG,
|
||||
BRANCH_COND,
|
||||
LAST_ISD_NUMBER
|
||||
};
|
||||
} // AMDILISD
|
||||
|
||||
class MachineBasicBlock;
|
||||
class MachineInstr;
|
||||
class DebugLoc;
|
||||
class TargetInstrInfo;
|
||||
|
||||
class AMDILTargetLowering : public TargetLowering
|
||||
{
|
||||
public:
|
||||
AMDILTargetLowering(TargetMachine &TM);
|
||||
|
||||
virtual SDValue
|
||||
LowerOperation(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
/// computeMaskedBitsForTargetNode - Determine which of
|
||||
/// the bits specified
|
||||
/// in Mask are known to be either zero or one and return them in
|
||||
/// the
|
||||
/// KnownZero/KnownOne bitsets.
|
||||
virtual void
|
||||
computeMaskedBitsForTargetNode(
|
||||
const SDValue Op,
|
||||
APInt &KnownZero,
|
||||
APInt &KnownOne,
|
||||
const SelectionDAG &DAG,
|
||||
unsigned Depth = 0
|
||||
) const;
|
||||
|
||||
virtual bool
|
||||
getTgtMemIntrinsic(IntrinsicInfo &Info,
|
||||
const CallInst &I, unsigned Intrinsic) const;
|
||||
virtual const char*
|
||||
getTargetNodeName(
|
||||
unsigned Opcode
|
||||
) const;
|
||||
// We want to mark f32/f64 floating point values as
|
||||
// legal
|
||||
bool
|
||||
isFPImmLegal(const APFloat &Imm, EVT VT) const;
|
||||
// We don't want to shrink f64/f32 constants because
|
||||
// they both take up the same amount of space and
|
||||
// we don't want to use a f2d instruction.
|
||||
bool ShouldShrinkFPConstant(EVT VT) const;
|
||||
|
||||
/// getFunctionAlignment - Return the Log2 alignment of this
|
||||
/// function.
|
||||
virtual unsigned int
|
||||
getFunctionAlignment(const Function *F) const;
|
||||
|
||||
private:
|
||||
CCAssignFn*
|
||||
CCAssignFnForNode(unsigned int CC) const;
|
||||
|
||||
SDValue LowerCallResult(SDValue Chain,
|
||||
SDValue InFlag,
|
||||
CallingConv::ID CallConv,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
DebugLoc dl,
|
||||
SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const;
|
||||
|
||||
SDValue LowerMemArgument(SDValue Chain,
|
||||
CallingConv::ID CallConv,
|
||||
const SmallVectorImpl<ISD::InputArg> &ArgInfo,
|
||||
DebugLoc dl, SelectionDAG &DAG,
|
||||
const CCValAssign &VA, MachineFrameInfo *MFI,
|
||||
unsigned i) const;
|
||||
|
||||
SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
|
||||
SDValue Arg,
|
||||
DebugLoc dl, SelectionDAG &DAG,
|
||||
const CCValAssign &VA,
|
||||
ISD::ArgFlagsTy Flags) const;
|
||||
|
||||
virtual SDValue
|
||||
LowerFormalArguments(SDValue Chain,
|
||||
CallingConv::ID CallConv, bool isVarArg,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
DebugLoc dl, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const;
|
||||
|
||||
virtual SDValue
|
||||
LowerCall(CallLoweringInfo &CLI,
|
||||
SmallVectorImpl<SDValue> &InVals) const;
|
||||
|
||||
virtual SDValue
|
||||
LowerReturn(SDValue Chain,
|
||||
CallingConv::ID CallConv, bool isVarArg,
|
||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||
const SmallVectorImpl<SDValue> &OutVals,
|
||||
DebugLoc dl, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue
|
||||
LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue
|
||||
LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue
|
||||
LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue
|
||||
LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue
|
||||
LowerSREM(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue
|
||||
LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue
|
||||
LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue
|
||||
LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue
|
||||
LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue
|
||||
LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue
|
||||
LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue
|
||||
LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue
|
||||
LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue
|
||||
LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue
|
||||
LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue
|
||||
LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue
|
||||
LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
EVT
|
||||
genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
|
||||
|
||||
SDValue
|
||||
LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue
|
||||
LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue
|
||||
LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue
|
||||
LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
}; // AMDILTargetLowering
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // AMDIL_ISELLOWERING_H_
|
|
@ -1,508 +0,0 @@
|
|||
//===- AMDILInstrInfo.cpp - AMDIL Instruction Information -------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the AMDIL implementation of the TargetInstrInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDILInstrInfo.h"
|
||||
#include "AMDIL.h"
|
||||
#include "AMDILISelLowering.h"
|
||||
#include "AMDILUtilityFunctions.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/PseudoSourceValue.h"
|
||||
#include "llvm/Instructions.h"
|
||||
|
||||
#define GET_INSTRINFO_CTOR
|
||||
#include "AMDGPUGenInstrInfo.inc"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AMDILInstrInfo::AMDILInstrInfo(TargetMachine &tm)
|
||||
: AMDILGenInstrInfo(),
|
||||
RI(tm, *this) {
|
||||
}
|
||||
|
||||
const AMDILRegisterInfo &AMDILInstrInfo::getRegisterInfo() const {
|
||||
return RI;
|
||||
}
|
||||
|
||||
bool AMDILInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
|
||||
unsigned &SrcReg, unsigned &DstReg,
|
||||
unsigned &SubIdx) const {
|
||||
// TODO: Implement this function
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned AMDILInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
|
||||
int &FrameIndex) const {
|
||||
// TODO: Implement this function
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned AMDILInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
|
||||
int &FrameIndex) const {
|
||||
// TODO: Implement this function
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool AMDILInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
|
||||
const MachineMemOperand *&MMO,
|
||||
int &FrameIndex) const {
|
||||
// TODO: Implement this function
|
||||
return false;
|
||||
}
|
||||
unsigned AMDILInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
|
||||
int &FrameIndex) const {
|
||||
// TODO: Implement this function
|
||||
return 0;
|
||||
}
|
||||
unsigned AMDILInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
|
||||
int &FrameIndex) const {
|
||||
// TODO: Implement this function
|
||||
return 0;
|
||||
}
|
||||
bool AMDILInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
|
||||
const MachineMemOperand *&MMO,
|
||||
int &FrameIndex) const {
|
||||
// TODO: Implement this function
|
||||
return false;
|
||||
}
|
||||
|
||||
MachineInstr *
|
||||
AMDILInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||
MachineBasicBlock::iterator &MBBI,
|
||||
LiveVariables *LV) const {
|
||||
// TODO: Implement this function
|
||||
return NULL;
|
||||
}
|
||||
bool AMDILInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
|
||||
MachineBasicBlock &MBB) const {
|
||||
while (iter != MBB.end()) {
|
||||
switch (iter->getOpcode()) {
|
||||
default:
|
||||
break;
|
||||
ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
|
||||
case AMDGPU::BRANCH:
|
||||
return true;
|
||||
};
|
||||
++iter;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDILInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock *&TBB,
|
||||
MachineBasicBlock *&FBB,
|
||||
SmallVectorImpl<MachineOperand> &Cond,
|
||||
bool AllowModify) const {
|
||||
bool retVal = true;
|
||||
return retVal;
|
||||
MachineBasicBlock::iterator iter = MBB.begin();
|
||||
if (!getNextBranchInstr(iter, MBB)) {
|
||||
retVal = false;
|
||||
} else {
|
||||
MachineInstr *firstBranch = iter;
|
||||
if (!getNextBranchInstr(++iter, MBB)) {
|
||||
if (firstBranch->getOpcode() == AMDGPU::BRANCH) {
|
||||
TBB = firstBranch->getOperand(0).getMBB();
|
||||
firstBranch->eraseFromParent();
|
||||
retVal = false;
|
||||
} else {
|
||||
TBB = firstBranch->getOperand(0).getMBB();
|
||||
FBB = *(++MBB.succ_begin());
|
||||
if (FBB == TBB) {
|
||||
FBB = *(MBB.succ_begin());
|
||||
}
|
||||
Cond.push_back(firstBranch->getOperand(1));
|
||||
retVal = false;
|
||||
}
|
||||
} else {
|
||||
MachineInstr *secondBranch = iter;
|
||||
if (!getNextBranchInstr(++iter, MBB)) {
|
||||
if (secondBranch->getOpcode() == AMDGPU::BRANCH) {
|
||||
TBB = firstBranch->getOperand(0).getMBB();
|
||||
Cond.push_back(firstBranch->getOperand(1));
|
||||
FBB = secondBranch->getOperand(0).getMBB();
|
||||
secondBranch->eraseFromParent();
|
||||
retVal = false;
|
||||
} else {
|
||||
assert(0 && "Should not have two consecutive conditional branches");
|
||||
}
|
||||
} else {
|
||||
MBB.getParent()->viewCFG();
|
||||
assert(0 && "Should not have three branch instructions in"
|
||||
" a single basic block");
|
||||
retVal = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
unsigned int AMDILInstrInfo::getBranchInstr(const MachineOperand &op) const {
|
||||
const MachineInstr *MI = op.getParent();
|
||||
|
||||
switch (MI->getDesc().OpInfo->RegClass) {
|
||||
default: // FIXME: fallthrough??
|
||||
case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
|
||||
case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
|
||||
};
|
||||
}
|
||||
|
||||
unsigned int
|
||||
AMDILInstrInfo::InsertBranch(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock *TBB,
|
||||
MachineBasicBlock *FBB,
|
||||
const SmallVectorImpl<MachineOperand> &Cond,
|
||||
DebugLoc DL) const
|
||||
{
|
||||
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
|
||||
for (unsigned int x = 0; x < Cond.size(); ++x) {
|
||||
Cond[x].getParent()->dump();
|
||||
}
|
||||
if (FBB == 0) {
|
||||
if (Cond.empty()) {
|
||||
BuildMI(&MBB, DL, get(AMDGPU::BRANCH)).addMBB(TBB);
|
||||
} else {
|
||||
BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
|
||||
.addMBB(TBB).addReg(Cond[0].getReg());
|
||||
}
|
||||
return 1;
|
||||
} else {
|
||||
BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
|
||||
.addMBB(TBB).addReg(Cond[0].getReg());
|
||||
BuildMI(&MBB, DL, get(AMDGPU::BRANCH)).addMBB(FBB);
|
||||
}
|
||||
assert(0 && "Inserting two branches not supported");
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned int AMDILInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
|
||||
MachineBasicBlock::iterator I = MBB.end();
|
||||
if (I == MBB.begin()) {
|
||||
return 0;
|
||||
}
|
||||
--I;
|
||||
switch (I->getOpcode()) {
|
||||
default:
|
||||
return 0;
|
||||
ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
|
||||
case AMDGPU::BRANCH:
|
||||
I->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
I = MBB.end();
|
||||
|
||||
if (I == MBB.begin()) {
|
||||
return 1;
|
||||
}
|
||||
--I;
|
||||
switch (I->getOpcode()) {
|
||||
// FIXME: only one case??
|
||||
default:
|
||||
return 1;
|
||||
ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
|
||||
I->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
return 2;
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
|
||||
MachineBasicBlock::iterator tmp = MBB->end();
|
||||
if (!MBB->size()) {
|
||||
return MBB->end();
|
||||
}
|
||||
while (--tmp) {
|
||||
if (tmp->getOpcode() == AMDGPU::ENDLOOP
|
||||
|| tmp->getOpcode() == AMDGPU::ENDIF
|
||||
|| tmp->getOpcode() == AMDGPU::ELSE) {
|
||||
if (tmp == MBB->begin()) {
|
||||
return tmp;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
return ++tmp;
|
||||
}
|
||||
}
|
||||
return MBB->end();
|
||||
}
|
||||
|
||||
void
|
||||
AMDILInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
unsigned SrcReg, bool isKill,
|
||||
int FrameIndex,
|
||||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI) const {
|
||||
unsigned int Opc = 0;
|
||||
// MachineInstr *curMI = MI;
|
||||
MachineFunction &MF = *(MBB.getParent());
|
||||
MachineFrameInfo &MFI = *MF.getFrameInfo();
|
||||
|
||||
DebugLoc DL;
|
||||
switch (RC->getID()) {
|
||||
case AMDGPU::GPRF32RegClassID:
|
||||
Opc = AMDGPU::PRIVATESTORE_f32;
|
||||
break;
|
||||
case AMDGPU::GPRI32RegClassID:
|
||||
Opc = AMDGPU::PRIVATESTORE_i32;
|
||||
break;
|
||||
}
|
||||
if (MI != MBB.end()) DL = MI->getDebugLoc();
|
||||
MachineMemOperand *MMO =
|
||||
new MachineMemOperand(
|
||||
MachinePointerInfo::getFixedStack(FrameIndex),
|
||||
MachineMemOperand::MOLoad,
|
||||
MFI.getObjectSize(FrameIndex),
|
||||
MFI.getObjectAlignment(FrameIndex));
|
||||
if (MI != MBB.end()) {
|
||||
DL = MI->getDebugLoc();
|
||||
}
|
||||
BuildMI(MBB, MI, DL, get(Opc))
|
||||
.addReg(SrcReg, getKillRegState(isKill))
|
||||
.addFrameIndex(FrameIndex)
|
||||
.addMemOperand(MMO)
|
||||
.addImm(0);
|
||||
}
|
||||
|
||||
void
|
||||
AMDILInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
unsigned DestReg, int FrameIndex,
|
||||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI) const {
|
||||
unsigned int Opc = 0;
|
||||
MachineFunction &MF = *(MBB.getParent());
|
||||
MachineFrameInfo &MFI = *MF.getFrameInfo();
|
||||
DebugLoc DL;
|
||||
switch (RC->getID()) {
|
||||
case AMDGPU::GPRF32RegClassID:
|
||||
Opc = AMDGPU::PRIVATELOAD_f32;
|
||||
break;
|
||||
case AMDGPU::GPRI32RegClassID:
|
||||
Opc = AMDGPU::PRIVATELOAD_i32;
|
||||
break;
|
||||
}
|
||||
|
||||
MachineMemOperand *MMO =
|
||||
new MachineMemOperand(
|
||||
MachinePointerInfo::getFixedStack(FrameIndex),
|
||||
MachineMemOperand::MOLoad,
|
||||
MFI.getObjectSize(FrameIndex),
|
||||
MFI.getObjectAlignment(FrameIndex));
|
||||
if (MI != MBB.end()) {
|
||||
DL = MI->getDebugLoc();
|
||||
}
|
||||
BuildMI(MBB, MI, DL, get(Opc))
|
||||
.addReg(DestReg, RegState::Define)
|
||||
.addFrameIndex(FrameIndex)
|
||||
.addMemOperand(MMO)
|
||||
.addImm(0);
|
||||
}
|
||||
MachineInstr *
|
||||
AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
||||
MachineInstr *MI,
|
||||
const SmallVectorImpl<unsigned> &Ops,
|
||||
int FrameIndex) const {
|
||||
// TODO: Implement this function
|
||||
return 0;
|
||||
}
|
||||
MachineInstr*
|
||||
AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
||||
MachineInstr *MI,
|
||||
const SmallVectorImpl<unsigned> &Ops,
|
||||
MachineInstr *LoadMI) const {
|
||||
// TODO: Implement this function
|
||||
return 0;
|
||||
}
|
||||
bool
|
||||
AMDILInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
|
||||
const SmallVectorImpl<unsigned> &Ops) const
|
||||
{
|
||||
// TODO: Implement this function
|
||||
return false;
|
||||
}
|
||||
bool
|
||||
AMDILInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
|
||||
unsigned Reg, bool UnfoldLoad,
|
||||
bool UnfoldStore,
|
||||
SmallVectorImpl<MachineInstr*> &NewMIs) const {
|
||||
// TODO: Implement this function
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
AMDILInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
|
||||
SmallVectorImpl<SDNode*> &NewNodes) const {
|
||||
// TODO: Implement this function
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned
|
||||
AMDILInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
|
||||
bool UnfoldLoad, bool UnfoldStore,
|
||||
unsigned *LoadRegIndex) const {
|
||||
// TODO: Implement this function
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool AMDILInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
|
||||
int64_t Offset1, int64_t Offset2,
|
||||
unsigned NumLoads) const {
|
||||
assert(Offset2 > Offset1
|
||||
&& "Second offset should be larger than first offset!");
|
||||
// If we have less than 16 loads in a row, and the offsets are within 16,
|
||||
// then schedule together.
|
||||
// TODO: Make the loads schedule near if it fits in a cacheline
|
||||
return (NumLoads < 16 && (Offset2 - Offset1) < 16);
|
||||
}
|
||||
|
||||
bool
|
||||
AMDILInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
|
||||
const {
|
||||
// TODO: Implement this function
|
||||
return true;
|
||||
}
|
||||
void AMDILInstrInfo::insertNoop(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI) const {
|
||||
// TODO: Implement this function
|
||||
}
|
||||
|
||||
bool AMDILInstrInfo::isPredicated(const MachineInstr *MI) const {
|
||||
// TODO: Implement this function
|
||||
return false;
|
||||
}
|
||||
bool
|
||||
AMDILInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
|
||||
const SmallVectorImpl<MachineOperand> &Pred2)
|
||||
const {
|
||||
// TODO: Implement this function
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDILInstrInfo::DefinesPredicate(MachineInstr *MI,
|
||||
std::vector<MachineOperand> &Pred) const {
|
||||
// TODO: Implement this function
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDILInstrInfo::isPredicable(MachineInstr *MI) const {
|
||||
// TODO: Implement this function
|
||||
return MI->getDesc().isPredicable();
|
||||
}
|
||||
|
||||
bool
|
||||
AMDILInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
|
||||
// TODO: Implement this function
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDILInstrInfo::isLoadInst(MachineInstr *MI) const {
|
||||
if (strstr(getName(MI->getOpcode()), "LOADCONST")) {
|
||||
return false;
|
||||
}
|
||||
return strstr(getName(MI->getOpcode()), "LOAD");
|
||||
}
|
||||
|
||||
bool AMDILInstrInfo::isSWSExtLoadInst(MachineInstr *MI) const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDILInstrInfo::isExtLoadInst(MachineInstr *MI) const {
|
||||
return strstr(getName(MI->getOpcode()), "EXTLOAD");
|
||||
}
|
||||
|
||||
bool AMDILInstrInfo::isSExtLoadInst(MachineInstr *MI) const {
|
||||
return strstr(getName(MI->getOpcode()), "SEXTLOAD");
|
||||
}
|
||||
|
||||
bool AMDILInstrInfo::isAExtLoadInst(MachineInstr *MI) const {
|
||||
return strstr(getName(MI->getOpcode()), "AEXTLOAD");
|
||||
}
|
||||
|
||||
bool AMDILInstrInfo::isZExtLoadInst(MachineInstr *MI) const {
|
||||
return strstr(getName(MI->getOpcode()), "ZEXTLOAD");
|
||||
}
|
||||
|
||||
bool AMDILInstrInfo::isStoreInst(MachineInstr *MI) const {
|
||||
return strstr(getName(MI->getOpcode()), "STORE");
|
||||
}
|
||||
|
||||
bool AMDILInstrInfo::isTruncStoreInst(MachineInstr *MI) const {
|
||||
return strstr(getName(MI->getOpcode()), "TRUNCSTORE");
|
||||
}
|
||||
|
||||
bool AMDILInstrInfo::isAtomicInst(MachineInstr *MI) const {
|
||||
return strstr(getName(MI->getOpcode()), "ATOM");
|
||||
}
|
||||
|
||||
bool AMDILInstrInfo::isVolatileInst(MachineInstr *MI) const {
|
||||
if (!MI->memoperands_empty()) {
|
||||
for (MachineInstr::mmo_iterator mob = MI->memoperands_begin(),
|
||||
moe = MI->memoperands_end(); mob != moe; ++mob) {
|
||||
// If there is a volatile mem operand, this is a volatile instruction.
|
||||
if ((*mob)->isVolatile()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
bool AMDILInstrInfo::isGlobalInst(llvm::MachineInstr *MI) const
|
||||
{
|
||||
return strstr(getName(MI->getOpcode()), "GLOBAL");
|
||||
}
|
||||
bool AMDILInstrInfo::isPrivateInst(llvm::MachineInstr *MI) const
|
||||
{
|
||||
return strstr(getName(MI->getOpcode()), "PRIVATE");
|
||||
}
|
||||
bool AMDILInstrInfo::isConstantInst(llvm::MachineInstr *MI) const
|
||||
{
|
||||
return strstr(getName(MI->getOpcode()), "CONSTANT")
|
||||
|| strstr(getName(MI->getOpcode()), "CPOOL");
|
||||
}
|
||||
bool AMDILInstrInfo::isRegionInst(llvm::MachineInstr *MI) const
|
||||
{
|
||||
return strstr(getName(MI->getOpcode()), "REGION");
|
||||
}
|
||||
bool AMDILInstrInfo::isLocalInst(llvm::MachineInstr *MI) const
|
||||
{
|
||||
return strstr(getName(MI->getOpcode()), "LOCAL");
|
||||
}
|
||||
bool AMDILInstrInfo::isImageInst(llvm::MachineInstr *MI) const
|
||||
{
|
||||
return strstr(getName(MI->getOpcode()), "IMAGE");
|
||||
}
|
||||
bool AMDILInstrInfo::isAppendInst(llvm::MachineInstr *MI) const
|
||||
{
|
||||
return strstr(getName(MI->getOpcode()), "APPEND");
|
||||
}
|
||||
bool AMDILInstrInfo::isRegionAtomic(llvm::MachineInstr *MI) const
|
||||
{
|
||||
return strstr(getName(MI->getOpcode()), "ATOM_R");
|
||||
}
|
||||
bool AMDILInstrInfo::isLocalAtomic(llvm::MachineInstr *MI) const
|
||||
{
|
||||
return strstr(getName(MI->getOpcode()), "ATOM_L");
|
||||
}
|
||||
bool AMDILInstrInfo::isGlobalAtomic(llvm::MachineInstr *MI) const
|
||||
{
|
||||
return strstr(getName(MI->getOpcode()), "ATOM_G")
|
||||
|| isArenaAtomic(MI);
|
||||
}
|
||||
bool AMDILInstrInfo::isArenaAtomic(llvm::MachineInstr *MI) const
|
||||
{
|
||||
return strstr(getName(MI->getOpcode()), "ATOM_A");
|
||||
}
|
|
@ -1,160 +0,0 @@
|
|||
//===- AMDILInstrInfo.h - AMDIL Instruction Information ---------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the AMDIL implementation of the TargetInstrInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDILINSTRUCTIONINFO_H_
|
||||
#define AMDILINSTRUCTIONINFO_H_
|
||||
|
||||
#include "AMDILRegisterInfo.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
|
||||
#define GET_INSTRINFO_HEADER
|
||||
#include "AMDGPUGenInstrInfo.inc"
|
||||
|
||||
namespace llvm {
|
||||
// AMDIL - This namespace holds all of the target specific flags that
|
||||
// instruction info tracks.
|
||||
//
|
||||
//class AMDILTargetMachine;
|
||||
class AMDILInstrInfo : public AMDILGenInstrInfo {
|
||||
private:
|
||||
const AMDILRegisterInfo RI;
|
||||
bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
|
||||
MachineBasicBlock &MBB) const;
|
||||
unsigned int getBranchInstr(const MachineOperand &op) const;
|
||||
public:
|
||||
explicit AMDILInstrInfo(TargetMachine &tm);
|
||||
|
||||
// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
|
||||
// such, whenever a client has an instance of instruction info, it should
|
||||
// always be able to get register info as well (through this method).
|
||||
const AMDILRegisterInfo &getRegisterInfo() const;
|
||||
|
||||
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
|
||||
unsigned &DstReg, unsigned &SubIdx) const;
|
||||
|
||||
unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
|
||||
unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
|
||||
int &FrameIndex) const;
|
||||
bool hasLoadFromStackSlot(const MachineInstr *MI,
|
||||
const MachineMemOperand *&MMO,
|
||||
int &FrameIndex) const;
|
||||
unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
|
||||
unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
|
||||
int &FrameIndex) const;
|
||||
bool hasStoreFromStackSlot(const MachineInstr *MI,
|
||||
const MachineMemOperand *&MMO,
|
||||
int &FrameIndex) const;
|
||||
|
||||
MachineInstr *
|
||||
convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||
MachineBasicBlock::iterator &MBBI,
|
||||
LiveVariables *LV) const;
|
||||
|
||||
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
|
||||
MachineBasicBlock *&FBB,
|
||||
SmallVectorImpl<MachineOperand> &Cond,
|
||||
bool AllowModify) const;
|
||||
|
||||
unsigned RemoveBranch(MachineBasicBlock &MBB) const;
|
||||
|
||||
unsigned
|
||||
InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
|
||||
MachineBasicBlock *FBB,
|
||||
const SmallVectorImpl<MachineOperand> &Cond,
|
||||
DebugLoc DL) const;
|
||||
|
||||
virtual void copyPhysReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI, DebugLoc DL,
|
||||
unsigned DestReg, unsigned SrcReg,
|
||||
bool KillSrc) const = 0;
|
||||
|
||||
void storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
unsigned SrcReg, bool isKill, int FrameIndex,
|
||||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI) const;
|
||||
void loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
unsigned DestReg, int FrameIndex,
|
||||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI) const;
|
||||
|
||||
protected:
|
||||
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
|
||||
MachineInstr *MI,
|
||||
const SmallVectorImpl<unsigned> &Ops,
|
||||
int FrameIndex) const;
|
||||
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
|
||||
MachineInstr *MI,
|
||||
const SmallVectorImpl<unsigned> &Ops,
|
||||
MachineInstr *LoadMI) const;
|
||||
public:
|
||||
bool canFoldMemoryOperand(const MachineInstr *MI,
|
||||
const SmallVectorImpl<unsigned> &Ops) const;
|
||||
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
|
||||
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
|
||||
SmallVectorImpl<MachineInstr *> &NewMIs) const;
|
||||
bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
|
||||
SmallVectorImpl<SDNode *> &NewNodes) const;
|
||||
unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
|
||||
bool UnfoldLoad, bool UnfoldStore,
|
||||
unsigned *LoadRegIndex = 0) const;
|
||||
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
|
||||
int64_t Offset1, int64_t Offset2,
|
||||
unsigned NumLoads) const;
|
||||
|
||||
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
|
||||
void insertNoop(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI) const;
|
||||
bool isPredicated(const MachineInstr *MI) const;
|
||||
bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
|
||||
const SmallVectorImpl<MachineOperand> &Pred2) const;
|
||||
bool DefinesPredicate(MachineInstr *MI,
|
||||
std::vector<MachineOperand> &Pred) const;
|
||||
bool isPredicable(MachineInstr *MI) const;
|
||||
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
|
||||
|
||||
// Helper functions that check the opcode for status information
|
||||
bool isLoadInst(llvm::MachineInstr *MI) const;
|
||||
bool isExtLoadInst(llvm::MachineInstr *MI) const;
|
||||
bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
|
||||
bool isSExtLoadInst(llvm::MachineInstr *MI) const;
|
||||
bool isZExtLoadInst(llvm::MachineInstr *MI) const;
|
||||
bool isAExtLoadInst(llvm::MachineInstr *MI) const;
|
||||
bool isStoreInst(llvm::MachineInstr *MI) const;
|
||||
bool isTruncStoreInst(llvm::MachineInstr *MI) const;
|
||||
bool isAtomicInst(llvm::MachineInstr *MI) const;
|
||||
bool isVolatileInst(llvm::MachineInstr *MI) const;
|
||||
bool isGlobalInst(llvm::MachineInstr *MI) const;
|
||||
bool isPrivateInst(llvm::MachineInstr *MI) const;
|
||||
bool isConstantInst(llvm::MachineInstr *MI) const;
|
||||
bool isRegionInst(llvm::MachineInstr *MI) const;
|
||||
bool isLocalInst(llvm::MachineInstr *MI) const;
|
||||
bool isImageInst(llvm::MachineInstr *MI) const;
|
||||
bool isAppendInst(llvm::MachineInstr *MI) const;
|
||||
bool isRegionAtomic(llvm::MachineInstr *MI) const;
|
||||
bool isLocalAtomic(llvm::MachineInstr *MI) const;
|
||||
bool isGlobalAtomic(llvm::MachineInstr *MI) const;
|
||||
bool isArenaAtomic(llvm::MachineInstr *MI) const;
|
||||
|
||||
virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
|
||||
int64_t Imm) const = 0;
|
||||
|
||||
virtual unsigned getIEQOpcode() const = 0;
|
||||
|
||||
virtual bool isMov(unsigned Opcode) const = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // AMDILINSTRINFO_H_
|
|
@ -1,108 +0,0 @@
|
|||
//===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes the AMDIL instructions in TableGen format.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDIL Instruction Predicate Definitions
|
||||
// Predicate that is set to true if the hardware supports double precision
|
||||
// divide
|
||||
def HasHWDDiv : Predicate<"Subtarget.device()"
|
||||
"->getGeneration() > AMDILDeviceInfo::HD4XXX && "
|
||||
"Subtarget.device()->usesHardware(AMDILDeviceInfo::DoubleOps)">;
|
||||
|
||||
// Predicate that is set to true if the hardware supports double, but not double
|
||||
// precision divide in hardware
|
||||
def HasSWDDiv : Predicate<"Subtarget.device()"
|
||||
"->getGeneration() == AMDILDeviceInfo::HD4XXX &&"
|
||||
"Subtarget.device()->usesHardware(AMDILDeviceInfo::DoubleOps)">;
|
||||
|
||||
// Predicate that is set to true if the hardware support 24bit signed
|
||||
// math ops. Otherwise a software expansion to 32bit math ops is used instead.
|
||||
def HasHWSign24Bit : Predicate<"Subtarget.device()"
|
||||
"->getGeneration() > AMDILDeviceInfo::HD5XXX">;
|
||||
|
||||
// Predicate that is set to true if 64bit operations are supported or not
|
||||
def HasHW64Bit : Predicate<"Subtarget.device()"
|
||||
"->usesHardware(AMDILDeviceInfo::LongOps)">;
|
||||
def HasSW64Bit : Predicate<"Subtarget.device()"
|
||||
"->usesSoftware(AMDILDeviceInfo::LongOps)">;
|
||||
|
||||
// Predicate that is set to true if the timer register is supported
|
||||
def HasTmrRegister : Predicate<"Subtarget.device()"
|
||||
"->isSupported(AMDILDeviceInfo::TmrReg)">;
|
||||
// Predicate that is true if we are at least evergreen series
|
||||
def HasDeviceIDInst : Predicate<"Subtarget.device()"
|
||||
"->getGeneration() >= AMDILDeviceInfo::HD5XXX">;
|
||||
|
||||
// Predicate that is true if we have region address space.
|
||||
def hasRegionAS : Predicate<"Subtarget.device()"
|
||||
"->usesHardware(AMDILDeviceInfo::RegionMem)">;
|
||||
|
||||
// Predicate that is false if we don't have region address space.
|
||||
def noRegionAS : Predicate<"!Subtarget.device()"
|
||||
"->isSupported(AMDILDeviceInfo::RegionMem)">;
|
||||
|
||||
|
||||
// Predicate that is set to true if 64bit Mul is supported in the IL or not
|
||||
def HasHW64Mul : Predicate<"Subtarget.calVersion()"
|
||||
">= CAL_VERSION_SC_139"
|
||||
"&& Subtarget.device()"
|
||||
"->getGeneration() >="
|
||||
"AMDILDeviceInfo::HD5XXX">;
|
||||
def HasSW64Mul : Predicate<"Subtarget.calVersion()"
|
||||
"< CAL_VERSION_SC_139">;
|
||||
// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not
|
||||
def HasHW64DivMod : Predicate<"Subtarget.device()"
|
||||
"->usesHardware(AMDILDeviceInfo::HW64BitDivMod)">;
|
||||
def HasSW64DivMod : Predicate<"Subtarget.device()"
|
||||
"->usesSoftware(AMDILDeviceInfo::HW64BitDivMod)">;
|
||||
|
||||
// Predicate that is set to true if 64bit pointer are used.
|
||||
def Has64BitPtr : Predicate<"Subtarget.is64bit()">;
|
||||
def Has32BitPtr : Predicate<"!Subtarget.is64bit()">;
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Custom Operands
|
||||
//===--------------------------------------------------------------------===//
|
||||
include "AMDILOperands.td"
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Custom Selection DAG Type Profiles
|
||||
//===--------------------------------------------------------------------===//
|
||||
include "AMDILProfiles.td"
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Custom Selection DAG Nodes
|
||||
//===--------------------------------------------------------------------===//
|
||||
include "AMDILNodes.td"
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Custom Pattern DAG Nodes
|
||||
//===--------------------------------------------------------------------===//
|
||||
include "AMDILPatterns.td"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction format classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
include "AMDILFormats.td"
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Multiclass Instruction formats
|
||||
//===--------------------------------------------------------------------===//
|
||||
include "AMDILMultiClass.td"
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Intrinsics support
|
||||
//===--------------------------------------------------------------------===//
|
||||
include "AMDILIntrinsics.td"
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Instructions support
|
||||
//===--------------------------------------------------------------------===//
|
||||
include "AMDILInstructions.td"
|
|
@ -1,143 +0,0 @@
|
|||
//===-- AMDILInstructions.td - AMDIL Instruction definitions --------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
|
||||
let Predicates = [Has32BitPtr] in {
|
||||
let isCodeGenOnly=1 in {
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Store Memory Operations
|
||||
//===----------------------------------------------------------------------===//
|
||||
defm GLOBALTRUNCSTORE : GTRUNCSTORE<"!global trunc store">;
|
||||
defm LOCALTRUNCSTORE : LTRUNCSTORE<"!local trunc store">;
|
||||
defm LOCALSTORE : STORE<"!local store" , local_store>;
|
||||
defm PRIVATETRUNCSTORE : PTRUNCSTORE<"!private trunc store">;
|
||||
defm PRIVATESTORE : STORE<"!private store" , private_store>;
|
||||
defm REGIONTRUNCSTORE : RTRUNCSTORE<"!region trunc store">;
|
||||
defm REGIONSTORE : STORE<"!region hw store" , region_store>;
|
||||
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Load Memory Operations
|
||||
//===---------------------------------------------------------------------===//
|
||||
defm GLOBALZEXTLOAD : LOAD<"!global zext load" , global_zext_load>;
|
||||
defm GLOBALSEXTLOAD : LOAD<"!global sext load" , global_sext_load>;
|
||||
defm GLOBALAEXTLOAD : LOAD<"!global aext load" , global_aext_load>;
|
||||
defm PRIVATELOAD : LOAD<"!private load" , private_load>;
|
||||
defm PRIVATEZEXTLOAD : LOAD<"!private zext load" , private_zext_load>;
|
||||
defm PRIVATESEXTLOAD : LOAD<"!private sext load" , private_sext_load>;
|
||||
defm PRIVATEAEXTLOAD : LOAD<"!private aext load" , private_aext_load>;
|
||||
defm CPOOLLOAD : LOAD<"!constant pool load" , cp_load>;
|
||||
defm CPOOLZEXTLOAD : LOAD<"!constant pool zext load", cp_zext_load>;
|
||||
defm CPOOLSEXTLOAD : LOAD<"!constant pool sext load", cp_sext_load>;
|
||||
defm CPOOLAEXTLOAD : LOAD<"!constant aext pool load", cp_aext_load>;
|
||||
defm CONSTANTLOAD : LOAD<"!constant load" , constant_load>;
|
||||
defm CONSTANTZEXTLOAD : LOAD<"!constant zext load" , constant_zext_load>;
|
||||
defm CONSTANTSEXTLOAD : LOAD<"!constant sext load" , constant_sext_load>;
|
||||
defm CONSTANTAEXTLOAD : LOAD<"!constant aext load" , constant_aext_load>;
|
||||
defm LOCALLOAD : LOAD<"!local load" , local_load>;
|
||||
defm LOCALZEXTLOAD : LOAD<"!local zext load" , local_zext_load>;
|
||||
defm LOCALSEXTLOAD : LOAD<"!local sext load" , local_sext_load>;
|
||||
defm LOCALAEXTLOAD : LOAD<"!local aext load" , local_aext_load>;
|
||||
defm REGIONLOAD : LOAD<"!region load" , region_load>;
|
||||
defm REGIONZEXTLOAD : LOAD<"!region zext load" , region_zext_load>;
|
||||
defm REGIONSEXTLOAD : LOAD<"!region sext load" , region_sext_load>;
|
||||
defm REGIONAEXTLOAD : LOAD<"!region aext load" , region_aext_load>;
|
||||
}
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Custom Inserter for Branches and returns, this eventually will be a
|
||||
// seperate pass
|
||||
//===---------------------------------------------------------------------===//
|
||||
let isTerminator = 1 in {
|
||||
def BRANCH : ILFormat<IL_PSEUDO_INST, (outs), (ins brtarget:$target),
|
||||
"; Pseudo unconditional branch instruction",
|
||||
[(br bb:$target)]>;
|
||||
defm BRANCH_COND : BranchConditional<IL_brcond>;
|
||||
}
|
||||
//===---------------------------------------------------------------------===//
|
||||
// return instructions
|
||||
//===---------------------------------------------------------------------===//
|
||||
let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
|
||||
def RETURN : ILFormat<IL_OP_RET,(outs), (ins variable_ops),
|
||||
IL_OP_RET.Text, [(IL_retflag)]>;
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Handle a function call
|
||||
//===---------------------------------------------------------------------===//
|
||||
let isCall = 1,
|
||||
Defs = [
|
||||
R1, R2, R3, R4, R5, R6, R7, R8, R9, R10
|
||||
]
|
||||
,
|
||||
Uses = [
|
||||
R11, R12, R13, R14, R15, R16, R17, R18, R19, R20
|
||||
]
|
||||
in {
|
||||
def CALL : UnaryOpNoRet<IL_OP_CALL, (outs),
|
||||
(ins calltarget:$dst),
|
||||
!strconcat(IL_OP_CALL.Text, " $dst"), []>;
|
||||
}
|
||||
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Flow and Program control Instructions
|
||||
//===---------------------------------------------------------------------===//
|
||||
let isTerminator=1 in {
|
||||
def SWITCH : ILFormat<IL_OP_SWITCH, (outs), (ins GPRI32:$src),
|
||||
!strconcat(IL_OP_SWITCH.Text, " $src"), []>;
|
||||
def CASE : ILFormat<IL_OP_CASE, (outs), (ins GPRI32:$src),
|
||||
!strconcat(IL_OP_CASE.Text, " $src"), []>;
|
||||
def BREAK : ILFormat<IL_OP_BREAK, (outs), (ins),
|
||||
IL_OP_BREAK.Text, []>;
|
||||
def CONTINUE : ILFormat<IL_OP_CONTINUE, (outs), (ins),
|
||||
IL_OP_CONTINUE.Text, []>;
|
||||
def DEFAULT : ILFormat<IL_OP_DEFAULT, (outs), (ins),
|
||||
IL_OP_DEFAULT.Text, []>;
|
||||
def ELSE : ILFormat<IL_OP_ELSE, (outs), (ins),
|
||||
IL_OP_ELSE.Text, []>;
|
||||
def ENDSWITCH : ILFormat<IL_OP_ENDSWITCH, (outs), (ins),
|
||||
IL_OP_ENDSWITCH.Text, []>;
|
||||
def ENDMAIN : ILFormat<IL_OP_ENDMAIN, (outs), (ins),
|
||||
IL_OP_ENDMAIN.Text, []>;
|
||||
def END : ILFormat<IL_OP_END, (outs), (ins),
|
||||
IL_OP_END.Text, []>;
|
||||
def ENDFUNC : ILFormat<IL_OP_ENDFUNC, (outs), (ins),
|
||||
IL_OP_ENDFUNC.Text, []>;
|
||||
def ENDIF : ILFormat<IL_OP_ENDIF, (outs), (ins),
|
||||
IL_OP_ENDIF.Text, []>;
|
||||
def WHILELOOP : ILFormat<IL_OP_WHILE, (outs), (ins),
|
||||
IL_OP_WHILE.Text, []>;
|
||||
def ENDLOOP : ILFormat<IL_OP_ENDLOOP, (outs), (ins),
|
||||
IL_OP_ENDLOOP.Text, []>;
|
||||
def FUNC : ILFormat<IL_OP_FUNC, (outs), (ins),
|
||||
IL_OP_FUNC.Text, []>;
|
||||
def RETDYN : ILFormat<IL_OP_RET_DYN, (outs), (ins),
|
||||
IL_OP_RET_DYN.Text, []>;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm IF_LOGICALNZ : BranchInstr<IL_OP_IF_LOGICALNZ>;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm IF_LOGICALZ : BranchInstr<IL_OP_IF_LOGICALZ>;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm BREAK_LOGICALNZ : BranchInstr<IL_OP_BREAK_LOGICALNZ>;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm BREAK_LOGICALZ : BranchInstr<IL_OP_BREAK_LOGICALZ>;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm CONTINUE_LOGICALNZ : BranchInstr<IL_OP_CONTINUE_LOGICALNZ>;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm CONTINUE_LOGICALZ : BranchInstr<IL_OP_CONTINUE_LOGICALZ>;
|
||||
defm IFC : BranchInstr2<IL_OP_IFC>;
|
||||
defm BREAKC : BranchInstr2<IL_OP_BREAKC>;
|
||||
defm CONTINUEC : BranchInstr2<IL_OP_CONTINUEC>;
|
||||
}
|
||||
let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
|
||||
def TRAP : ILFormat<IL_OP_NOP, (outs), (ins),
|
||||
IL_OP_NOP.Text, [(trap)]>;
|
||||
}
|
||||
|
|
@ -1,93 +0,0 @@
|
|||
//===- AMDILIntrinsicInfo.cpp - AMDIL Intrinsic Information ------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the AMDIL Implementation of the IntrinsicInfo class.
|
||||
//
|
||||
//===-----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDILIntrinsicInfo.h"
|
||||
#include "AMDIL.h"
|
||||
#include "AMDILSubtarget.h"
|
||||
#include "llvm/DerivedTypes.h"
|
||||
#include "llvm/Intrinsics.h"
|
||||
#include "llvm/Module.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
|
||||
#include "AMDGPUGenIntrinsics.inc"
|
||||
#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
|
||||
|
||||
AMDILIntrinsicInfo::AMDILIntrinsicInfo(TargetMachine *tm)
|
||||
: TargetIntrinsicInfo()
|
||||
{
|
||||
}
|
||||
|
||||
std::string
|
||||
AMDILIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
|
||||
unsigned int numTys) const
|
||||
{
|
||||
static const char* const names[] = {
|
||||
#define GET_INTRINSIC_NAME_TABLE
|
||||
#include "AMDGPUGenIntrinsics.inc"
|
||||
#undef GET_INTRINSIC_NAME_TABLE
|
||||
};
|
||||
|
||||
//assert(!isOverloaded(IntrID)
|
||||
//&& "AMDIL Intrinsics are not overloaded");
|
||||
if (IntrID < Intrinsic::num_intrinsics) {
|
||||
return 0;
|
||||
}
|
||||
assert(IntrID < AMDGPUIntrinsic::num_AMDIL_intrinsics
|
||||
&& "Invalid intrinsic ID");
|
||||
|
||||
std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
|
||||
return Result;
|
||||
}
|
||||
|
||||
unsigned int
|
||||
AMDILIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const
|
||||
{
|
||||
#define GET_FUNCTION_RECOGNIZER
|
||||
#include "AMDGPUGenIntrinsics.inc"
|
||||
#undef GET_FUNCTION_RECOGNIZER
|
||||
AMDGPUIntrinsic::ID IntrinsicID
|
||||
= (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic;
|
||||
IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", Name);
|
||||
|
||||
if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) {
|
||||
return IntrinsicID;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool
|
||||
AMDILIntrinsicInfo::isOverloaded(unsigned id) const
|
||||
{
|
||||
// Overload Table
|
||||
#define GET_INTRINSIC_OVERLOAD_TABLE
|
||||
#include "AMDGPUGenIntrinsics.inc"
|
||||
#undef GET_INTRINSIC_OVERLOAD_TABLE
|
||||
}
|
||||
|
||||
/// This defines the "getAttributes(ID id)" method.
|
||||
#define GET_INTRINSIC_ATTRIBUTES
|
||||
#include "AMDGPUGenIntrinsics.inc"
|
||||
#undef GET_INTRINSIC_ATTRIBUTES
|
||||
|
||||
Function*
|
||||
AMDILIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
|
||||
Type **Tys,
|
||||
unsigned numTys) const
|
||||
{
|
||||
//Silence a warning
|
||||
AttrListPtr List = getAttributes((AMDGPUIntrinsic::ID)IntrID);
|
||||
(void)List;
|
||||
assert(!"Not implemented");
|
||||
}
|
|
@ -1,47 +0,0 @@
|
|||
//===- AMDILIntrinsicInfo.h - AMDIL Intrinsic Information ------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface for the AMDIL Implementation of the Intrinsic Info class.
|
||||
//
|
||||
//===-----------------------------------------------------------------------===//
|
||||
#ifndef _AMDIL_INTRINSICS_H_
|
||||
#define _AMDIL_INTRINSICS_H_
|
||||
|
||||
#include "llvm/Intrinsics.h"
|
||||
#include "llvm/Target/TargetIntrinsicInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
class TargetMachine;
|
||||
namespace AMDGPUIntrinsic {
|
||||
enum ID {
|
||||
last_non_AMDIL_intrinsic = Intrinsic::num_intrinsics - 1,
|
||||
#define GET_INTRINSIC_ENUM_VALUES
|
||||
#include "AMDGPUGenIntrinsics.inc"
|
||||
#undef GET_INTRINSIC_ENUM_VALUES
|
||||
, num_AMDIL_intrinsics
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
||||
class AMDILIntrinsicInfo : public TargetIntrinsicInfo {
|
||||
public:
|
||||
AMDILIntrinsicInfo(TargetMachine *tm);
|
||||
std::string getName(unsigned int IntrId, Type **Tys = 0,
|
||||
unsigned int numTys = 0) const;
|
||||
unsigned int lookupName(const char *Name, unsigned int Len) const;
|
||||
bool isOverloaded(unsigned int IID) const;
|
||||
Function *getDeclaration(Module *M, unsigned int ID,
|
||||
Type **Tys = 0,
|
||||
unsigned int numTys = 0) const;
|
||||
}; // AMDILIntrinsicInfo
|
||||
}
|
||||
|
||||
#endif // _AMDIL_INTRINSICS_H_
|
||||
|
|
@ -1,705 +0,0 @@
|
|||
//===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines all of the amdil-specific intrinsics
|
||||
//
|
||||
//===---------------------------------------------------------------===//
|
||||
|
||||
let TargetPrefix = "AMDIL", isTarget = 1 in {
|
||||
//------------- Synchronization Functions - OpenCL 6.11.9 --------------------//
|
||||
def int_AMDIL_fence : GCCBuiltin<"mem_fence">,
|
||||
UnaryIntNoRetInt;
|
||||
def int_AMDIL_fence_global : GCCBuiltin<"mem_fence_global">,
|
||||
UnaryIntNoRetInt;
|
||||
def int_AMDIL_fence_local : GCCBuiltin<"mem_fence_local">,
|
||||
UnaryIntNoRetInt;
|
||||
def int_AMDIL_fence_region : GCCBuiltin<"mem_fence_region">,
|
||||
UnaryIntNoRetInt;
|
||||
def int_AMDIL_fence_read_only : GCCBuiltin<"read_mem_fence">,
|
||||
UnaryIntNoRetInt;
|
||||
def int_AMDIL_fence_read_only_global : GCCBuiltin<"read_mem_fence_global">,
|
||||
UnaryIntNoRetInt;
|
||||
def int_AMDIL_fence_read_only_local : GCCBuiltin<"read_mem_fence_local">,
|
||||
UnaryIntNoRetInt;
|
||||
def int_AMDIL_fence_read_only_region : GCCBuiltin<"read_mem_fence_region">,
|
||||
UnaryIntNoRetInt;
|
||||
def int_AMDIL_fence_write_only : GCCBuiltin<"write_mem_fence">,
|
||||
UnaryIntNoRetInt;
|
||||
def int_AMDIL_fence_write_only_global : GCCBuiltin<"write_mem_fence_global">,
|
||||
UnaryIntNoRetInt;
|
||||
def int_AMDIL_fence_write_only_local : GCCBuiltin<"write_mem_fence_local">,
|
||||
UnaryIntNoRetInt;
|
||||
def int_AMDIL_fence_write_only_region : GCCBuiltin<"write_mem_fence_region">,
|
||||
UnaryIntNoRetInt;
|
||||
|
||||
def int_AMDIL_early_exit : GCCBuiltin<"__amdil_early_exit">,
|
||||
UnaryIntNoRetInt;
|
||||
|
||||
def int_AMDIL_cmov_logical : GCCBuiltin<"__amdil_cmov_logical">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_fabs : GCCBuiltin<"__amdil_fabs">, UnaryIntFloat;
|
||||
def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt;
|
||||
|
||||
def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">,
|
||||
UnaryIntInt;
|
||||
def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">,
|
||||
UnaryIntInt;
|
||||
def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">,
|
||||
UnaryIntInt;
|
||||
def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">,
|
||||
UnaryIntInt;
|
||||
def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">,
|
||||
UnaryIntInt;
|
||||
def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">,
|
||||
QuaternaryIntInt;
|
||||
def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_mad : GCCBuiltin<"__amdil_mad">,
|
||||
TernaryIntFloat;
|
||||
def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_min : GCCBuiltin<"__amdil_min">,
|
||||
BinaryIntFloat;
|
||||
def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_max : GCCBuiltin<"__amdil_max">,
|
||||
BinaryIntFloat;
|
||||
def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">,
|
||||
TernaryIntFloat;
|
||||
def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_round_posinf : GCCBuiltin<"__amdil_round_posinf">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat;
|
||||
def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat;
|
||||
def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt;
|
||||
def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_log : GCCBuiltin<"__amdil_log">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">,
|
||||
TernaryIntFloat;
|
||||
def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_length : GCCBuiltin<"__amdil_length">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">,
|
||||
TernaryIntFloat;
|
||||
def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty,
|
||||
llvm_v4i32_ty, llvm_i32_ty], []>;
|
||||
|
||||
def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>;
|
||||
def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">,
|
||||
Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
|
||||
def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
|
||||
def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">,
|
||||
ConvertIntITOF;
|
||||
def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">,
|
||||
ConvertIntFTOI;
|
||||
def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">,
|
||||
ConvertIntFTOI;
|
||||
def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">,
|
||||
ConvertIntFTOI;
|
||||
def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">,
|
||||
ConvertIntFTOI;
|
||||
def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">,
|
||||
ConvertIntFTOI;
|
||||
def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">,
|
||||
ConvertIntFTOI;
|
||||
def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>;
|
||||
def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">,
|
||||
ConvertIntITOF;
|
||||
def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">,
|
||||
ConvertIntITOF;
|
||||
def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">,
|
||||
ConvertIntITOF;
|
||||
def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">,
|
||||
ConvertIntITOF;
|
||||
def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
|
||||
llvm_v2f32_ty, llvm_float_ty], []>;
|
||||
def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
|
||||
llvm_v2f32_ty], []>;
|
||||
def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
|
||||
llvm_v4f32_ty], []>;
|
||||
def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
|
||||
llvm_v4f32_ty], []>;
|
||||
//===---------------------- Image functions begin ------------------------===//
|
||||
def int_AMDIL_image1d_write : GCCBuiltin<"__amdil_image1d_write">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
|
||||
|
||||
def int_AMDIL_image1d_read_norm : GCCBuiltin<"__amdil_image1d_read_norm">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
|
||||
|
||||
def int_AMDIL_image1d_read_unnorm : GCCBuiltin<"__amdil_image1d_read_unnorm">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
|
||||
|
||||
def int_AMDIL_image1d_info0 : GCCBuiltin<"__amdil_image1d_info0">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
|
||||
|
||||
def int_AMDIL_image1d_info1 : GCCBuiltin<"__amdil_image1d_info1">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
|
||||
|
||||
def int_AMDIL_image1d_array_write : GCCBuiltin<"__amdil_image1d_array_write">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
|
||||
|
||||
def int_AMDIL_image1d_array_read_norm : GCCBuiltin<"__amdil_image1d_array_read_norm">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
|
||||
|
||||
def int_AMDIL_image1d_array_read_unnorm : GCCBuiltin<"__amdil_image1d_array_read_unnorm">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
|
||||
|
||||
def int_AMDIL_image1d_array_info0 : GCCBuiltin<"__amdil_image1d_array_info0">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
|
||||
|
||||
def int_AMDIL_image1d_array_info1 : GCCBuiltin<"__amdil_image1d_array_info1">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
|
||||
|
||||
def int_AMDIL_image2d_write : GCCBuiltin<"__amdil_image2d_write">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
|
||||
|
||||
def int_AMDIL_image2d_read_norm : GCCBuiltin<"__amdil_image2d_read_norm">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
|
||||
|
||||
def int_AMDIL_image2d_read_unnorm : GCCBuiltin<"__amdil_image2d_read_unnorm">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
|
||||
|
||||
def int_AMDIL_image2d_info0 : GCCBuiltin<"__amdil_image2d_info0">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
|
||||
|
||||
def int_AMDIL_image2d_info1 : GCCBuiltin<"__amdil_image2d_info1">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
|
||||
|
||||
def int_AMDIL_image2d_array_write : GCCBuiltin<"__amdil_image2d_array_write">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
|
||||
|
||||
def int_AMDIL_image2d_array_read_norm : GCCBuiltin<"__amdil_image2d_array_read_norm">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
|
||||
|
||||
def int_AMDIL_image2d_array_read_unnorm : GCCBuiltin<"__amdil_image2d_array_read_unnorm">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
|
||||
|
||||
def int_AMDIL_image2d_array_info0 : GCCBuiltin<"__amdil_image2d_array_info0">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
|
||||
|
||||
def int_AMDIL_image2d_array_info1 : GCCBuiltin<"__amdil_image2d_array_info1">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
|
||||
|
||||
def int_AMDIL_image3d_write : GCCBuiltin<"__amdil_image3d_write">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
|
||||
|
||||
def int_AMDIL_image3d_read_norm : GCCBuiltin<"__amdil_image3d_read_norm">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
|
||||
|
||||
def int_AMDIL_image3d_read_unnorm : GCCBuiltin<"__amdil_image3d_read_unnorm">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
|
||||
|
||||
def int_AMDIL_image3d_info0 : GCCBuiltin<"__amdil_image3d_info0">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
|
||||
|
||||
def int_AMDIL_image3d_info1 : GCCBuiltin<"__amdil_image3d_info1">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
|
||||
|
||||
//===---------------------- Image functions end --------------------------===//
|
||||
|
||||
def int_AMDIL_append_alloc_i32 : GCCBuiltin<"__amdil_append_alloc">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
|
||||
def int_AMDIL_append_consume_i32 : GCCBuiltin<"__amdil_append_consume">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
|
||||
def int_AMDIL_append_alloc_i32_noret : GCCBuiltin<"__amdil_append_alloc_noret">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
|
||||
def int_AMDIL_append_consume_i32_noret : GCCBuiltin<"__amdil_append_consume_noret">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
|
||||
|
||||
def int_AMDIL_get_global_id : GCCBuiltin<"__amdil_get_global_id_int">,
|
||||
Intrinsic<[llvm_v4i32_ty], [], []>;
|
||||
def int_AMDIL_get_local_id : GCCBuiltin<"__amdil_get_local_id_int">,
|
||||
Intrinsic<[llvm_v4i32_ty], [], []>;
|
||||
def int_AMDIL_get_group_id : GCCBuiltin<"__amdil_get_group_id_int">,
|
||||
Intrinsic<[llvm_v4i32_ty], [], []>;
|
||||
def int_AMDIL_get_num_groups : GCCBuiltin<"__amdil_get_num_groups_int">,
|
||||
Intrinsic<[llvm_v4i32_ty], [], []>;
|
||||
def int_AMDIL_get_local_size : GCCBuiltin<"__amdil_get_local_size_int">,
|
||||
Intrinsic<[llvm_v4i32_ty], [], []>;
|
||||
def int_AMDIL_get_global_size : GCCBuiltin<"__amdil_get_global_size_int">,
|
||||
Intrinsic<[llvm_v4i32_ty], [], []>;
|
||||
def int_AMDIL_get_global_offset : GCCBuiltin<"__amdil_get_global_offset_int">,
|
||||
Intrinsic<[llvm_v4i32_ty], [], []>;
|
||||
def int_AMDIL_get_work_dim : GCCBuiltin<"get_work_dim">,
|
||||
Intrinsic<[llvm_i32_ty], [], []>;
|
||||
def int_AMDIL_get_printf_offset : GCCBuiltin<"__amdil_get_printf_offset">,
|
||||
Intrinsic<[llvm_i32_ty], []>;
|
||||
def int_AMDIL_get_printf_size : GCCBuiltin<"__amdil_get_printf_size">,
|
||||
Intrinsic<[llvm_i32_ty], []>;
|
||||
|
||||
/// Intrinsics for atomic instructions with no return value
|
||||
/// Signed 32 bit integer atomics for global address space
|
||||
def int_AMDIL_atomic_add_gi32_noret : GCCBuiltin<"__atomic_add_gi32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_sub_gi32_noret : GCCBuiltin<"__atomic_sub_gi32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_rsub_gi32_noret : GCCBuiltin<"__atomic_rsub_gi32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_xchg_gi32_noret : GCCBuiltin<"__atomic_xchg_gi32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_inc_gi32_noret : GCCBuiltin<"__atomic_inc_gi32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_dec_gi32_noret : GCCBuiltin<"__atomic_dec_gi32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_cmpxchg_gi32_noret : GCCBuiltin<"__atomic_cmpxchg_gi32_noret">,
|
||||
TernaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_min_gi32_noret : GCCBuiltin<"__atomic_min_gi32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_max_gi32_noret : GCCBuiltin<"__atomic_max_gi32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_and_gi32_noret : GCCBuiltin<"__atomic_and_gi32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_or_gi32_noret : GCCBuiltin<"__atomic_or_gi32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_xor_gi32_noret : GCCBuiltin<"__atomic_xor_gi32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
|
||||
|
||||
|
||||
/// Unsigned 32 bit integer atomics for global address space
|
||||
def int_AMDIL_atomic_add_gu32_noret : GCCBuiltin<"__atomic_add_gu32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_sub_gu32_noret : GCCBuiltin<"__atomic_sub_gu32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_rsub_gu32_noret : GCCBuiltin<"__atomic_rsub_gu32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_xchg_gu32_noret : GCCBuiltin<"__atomic_xchg_gu32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_inc_gu32_noret : GCCBuiltin<"__atomic_inc_gu32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_dec_gu32_noret : GCCBuiltin<"__atomic_dec_gu32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_cmpxchg_gu32_noret : GCCBuiltin<"__atomic_cmpxchg_gu32_noret">,
|
||||
TernaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_min_gu32_noret : GCCBuiltin<"__atomic_min_gu32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_max_gu32_noret : GCCBuiltin<"__atomic_max_gu32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_and_gu32_noret : GCCBuiltin<"__atomic_and_gu32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_or_gu32_noret : GCCBuiltin<"__atomic_or_gu32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_xor_gu32_noret : GCCBuiltin<"__atomic_xor_gu32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
|
||||
|
||||
/// Intrinsics for atomic instructions with a return value
|
||||
/// Signed 32 bit integer atomics for global address space
|
||||
def int_AMDIL_atomic_add_gi32 : GCCBuiltin<"__atomic_add_gi32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_sub_gi32 : GCCBuiltin<"__atomic_sub_gi32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_rsub_gi32 : GCCBuiltin<"__atomic_rsub_gi32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_xchg_gi32 : GCCBuiltin<"__atomic_xchg_gi32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_inc_gi32 : GCCBuiltin<"__atomic_inc_gi32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_dec_gi32 : GCCBuiltin<"__atomic_dec_gi32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_cmpxchg_gi32 : GCCBuiltin<"__atomic_cmpxchg_gi32">,
|
||||
TernaryAtomicInt;
|
||||
def int_AMDIL_atomic_min_gi32 : GCCBuiltin<"__atomic_min_gi32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_max_gi32 : GCCBuiltin<"__atomic_max_gi32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_and_gi32 : GCCBuiltin<"__atomic_and_gi32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_or_gi32 : GCCBuiltin<"__atomic_or_gi32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_xor_gi32 : GCCBuiltin<"__atomic_xor_gi32">,
|
||||
BinaryAtomicInt;
|
||||
|
||||
/// 32 bit float atomics required by OpenCL
|
||||
def int_AMDIL_atomic_xchg_gf32 : GCCBuiltin<"__atomic_xchg_gf32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_xchg_gf32_noret : GCCBuiltin<"__atomic_xchg_gf32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
|
||||
/// Unsigned 32 bit integer atomics for global address space
|
||||
def int_AMDIL_atomic_add_gu32 : GCCBuiltin<"__atomic_add_gu32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_sub_gu32 : GCCBuiltin<"__atomic_sub_gu32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_rsub_gu32 : GCCBuiltin<"__atomic_rsub_gu32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_xchg_gu32 : GCCBuiltin<"__atomic_xchg_gu32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_inc_gu32 : GCCBuiltin<"__atomic_inc_gu32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_dec_gu32 : GCCBuiltin<"__atomic_dec_gu32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_cmpxchg_gu32 : GCCBuiltin<"__atomic_cmpxchg_gu32">,
|
||||
TernaryAtomicInt;
|
||||
def int_AMDIL_atomic_min_gu32 : GCCBuiltin<"__atomic_min_gu32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_max_gu32 : GCCBuiltin<"__atomic_max_gu32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_and_gu32 : GCCBuiltin<"__atomic_and_gu32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_or_gu32 : GCCBuiltin<"__atomic_or_gu32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_xor_gu32 : GCCBuiltin<"__atomic_xor_gu32">,
|
||||
BinaryAtomicInt;
|
||||
|
||||
|
||||
/// Intrinsics for atomic instructions with no return value
|
||||
/// Signed 32 bit integer atomics for local address space
|
||||
def int_AMDIL_atomic_add_li32_noret : GCCBuiltin<"__atomic_add_li32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_sub_li32_noret : GCCBuiltin<"__atomic_sub_li32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_rsub_li32_noret : GCCBuiltin<"__atomic_rsub_li32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_xchg_li32_noret : GCCBuiltin<"__atomic_xchg_li32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_inc_li32_noret : GCCBuiltin<"__atomic_inc_li32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_dec_li32_noret : GCCBuiltin<"__atomic_dec_li32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_cmpxchg_li32_noret : GCCBuiltin<"__atomic_cmpxchg_li32_noret">,
|
||||
TernaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_min_li32_noret : GCCBuiltin<"__atomic_min_li32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_max_li32_noret : GCCBuiltin<"__atomic_max_li32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_and_li32_noret : GCCBuiltin<"__atomic_and_li32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_or_li32_noret : GCCBuiltin<"__atomic_or_li32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_mskor_li32_noret : GCCBuiltin<"__atomic_mskor_li32_noret">,
|
||||
TernaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_xor_li32_noret : GCCBuiltin<"__atomic_xor_li32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
|
||||
/// Signed 32 bit integer atomics for region address space
|
||||
def int_AMDIL_atomic_add_ri32_noret : GCCBuiltin<"__atomic_add_ri32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_sub_ri32_noret : GCCBuiltin<"__atomic_sub_ri32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_rsub_ri32_noret : GCCBuiltin<"__atomic_rsub_ri32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_xchg_ri32_noret : GCCBuiltin<"__atomic_xchg_ri32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_inc_ri32_noret : GCCBuiltin<"__atomic_inc_ri32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_dec_ri32_noret : GCCBuiltin<"__atomic_dec_ri32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_cmpxchg_ri32_noret : GCCBuiltin<"__atomic_cmpxchg_ri32_noret">,
|
||||
TernaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_min_ri32_noret : GCCBuiltin<"__atomic_min_ri32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_max_ri32_noret : GCCBuiltin<"__atomic_max_ri32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_and_ri32_noret : GCCBuiltin<"__atomic_and_ri32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_or_ri32_noret : GCCBuiltin<"__atomic_or_ri32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_mskor_ri32_noret : GCCBuiltin<"__atomic_mskor_ri32_noret">,
|
||||
TernaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_xor_ri32_noret : GCCBuiltin<"__atomic_xor_ri32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
|
||||
|
||||
|
||||
/// Unsigned 32 bit integer atomics for local address space
|
||||
def int_AMDIL_atomic_add_lu32_noret : GCCBuiltin<"__atomic_add_lu32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_sub_lu32_noret : GCCBuiltin<"__atomic_sub_lu32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_rsub_lu32_noret : GCCBuiltin<"__atomic_rsub_lu32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_xchg_lu32_noret : GCCBuiltin<"__atomic_xchg_lu32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_inc_lu32_noret : GCCBuiltin<"__atomic_inc_lu32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_dec_lu32_noret : GCCBuiltin<"__atomic_dec_lu32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_cmpxchg_lu32_noret : GCCBuiltin<"__atomic_cmpxchg_lu32_noret">,
|
||||
TernaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_min_lu32_noret : GCCBuiltin<"__atomic_min_lu32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_max_lu32_noret : GCCBuiltin<"__atomic_max_lu32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_and_lu32_noret : GCCBuiltin<"__atomic_and_lu32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_or_lu32_noret : GCCBuiltin<"__atomic_or_lu32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_mskor_lu32_noret : GCCBuiltin<"__atomic_mskor_lu32_noret">,
|
||||
TernaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_xor_lu32_noret : GCCBuiltin<"__atomic_xor_lu32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
|
||||
/// Unsigned 32 bit integer atomics for region address space
|
||||
def int_AMDIL_atomic_add_ru32_noret : GCCBuiltin<"__atomic_add_ru32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_sub_ru32_noret : GCCBuiltin<"__atomic_sub_ru32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_rsub_ru32_noret : GCCBuiltin<"__atomic_rsub_ru32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_xchg_ru32_noret : GCCBuiltin<"__atomic_xchg_ru32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_inc_ru32_noret : GCCBuiltin<"__atomic_inc_ru32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_dec_ru32_noret : GCCBuiltin<"__atomic_dec_ru32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_cmpxchg_ru32_noret : GCCBuiltin<"__atomic_cmpxchg_ru32_noret">,
|
||||
TernaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_min_ru32_noret : GCCBuiltin<"__atomic_min_ru32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_max_ru32_noret : GCCBuiltin<"__atomic_max_ru32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_and_ru32_noret : GCCBuiltin<"__atomic_and_ru32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_or_ru32_noret : GCCBuiltin<"__atomic_or_ru32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_mskor_ru32_noret : GCCBuiltin<"__atomic_mskor_ru32_noret">,
|
||||
TernaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_xor_ru32_noret : GCCBuiltin<"__atomic_xor_ru32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
|
||||
def int_AMDIL_get_cycle_count : GCCBuiltin<"__amdil_get_cycle_count">,
|
||||
VoidIntLong;
|
||||
|
||||
def int_AMDIL_compute_unit_id : GCCBuiltin<"__amdil_compute_unit_id">,
|
||||
VoidIntInt;
|
||||
|
||||
def int_AMDIL_wavefront_id : GCCBuiltin<"__amdil_wavefront_id">,
|
||||
VoidIntInt;
|
||||
|
||||
|
||||
/// Intrinsics for atomic instructions with a return value
|
||||
/// Signed 32 bit integer atomics for local address space
|
||||
def int_AMDIL_atomic_add_li32 : GCCBuiltin<"__atomic_add_li32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_sub_li32 : GCCBuiltin<"__atomic_sub_li32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_rsub_li32 : GCCBuiltin<"__atomic_rsub_li32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_xchg_li32 : GCCBuiltin<"__atomic_xchg_li32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_inc_li32 : GCCBuiltin<"__atomic_inc_li32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_dec_li32 : GCCBuiltin<"__atomic_dec_li32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_cmpxchg_li32 : GCCBuiltin<"__atomic_cmpxchg_li32">,
|
||||
TernaryAtomicInt;
|
||||
def int_AMDIL_atomic_min_li32 : GCCBuiltin<"__atomic_min_li32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_max_li32 : GCCBuiltin<"__atomic_max_li32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_and_li32 : GCCBuiltin<"__atomic_and_li32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_or_li32 : GCCBuiltin<"__atomic_or_li32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_mskor_li32 : GCCBuiltin<"__atomic_mskor_li32">,
|
||||
TernaryAtomicInt;
|
||||
def int_AMDIL_atomic_xor_li32 : GCCBuiltin<"__atomic_xor_li32">,
|
||||
BinaryAtomicInt;
|
||||
|
||||
/// Signed 32 bit integer atomics for region address space
|
||||
def int_AMDIL_atomic_add_ri32 : GCCBuiltin<"__atomic_add_ri32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_sub_ri32 : GCCBuiltin<"__atomic_sub_ri32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_rsub_ri32 : GCCBuiltin<"__atomic_rsub_ri32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_xchg_ri32 : GCCBuiltin<"__atomic_xchg_ri32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_inc_ri32 : GCCBuiltin<"__atomic_inc_ri32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_dec_ri32 : GCCBuiltin<"__atomic_dec_ri32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_cmpxchg_ri32 : GCCBuiltin<"__atomic_cmpxchg_ri32">,
|
||||
TernaryAtomicInt;
|
||||
def int_AMDIL_atomic_min_ri32 : GCCBuiltin<"__atomic_min_ri32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_max_ri32 : GCCBuiltin<"__atomic_max_ri32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_and_ri32 : GCCBuiltin<"__atomic_and_ri32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_or_ri32 : GCCBuiltin<"__atomic_or_ri32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_mskor_ri32 : GCCBuiltin<"__atomic_mskor_ri32">,
|
||||
TernaryAtomicInt;
|
||||
def int_AMDIL_atomic_xor_ri32 : GCCBuiltin<"__atomic_xor_ri32">,
|
||||
BinaryAtomicInt;
|
||||
|
||||
/// 32 bit float atomics required by OpenCL
|
||||
def int_AMDIL_atomic_xchg_lf32 : GCCBuiltin<"__atomic_xchg_lf32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_xchg_lf32_noret : GCCBuiltin<"__atomic_xchg_lf32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
def int_AMDIL_atomic_xchg_rf32 : GCCBuiltin<"__atomic_xchg_rf32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_xchg_rf32_noret : GCCBuiltin<"__atomic_xchg_rf32_noret">,
|
||||
BinaryAtomicIntNoRet;
|
||||
|
||||
/// Unsigned 32 bit integer atomics for local address space
|
||||
def int_AMDIL_atomic_add_lu32 : GCCBuiltin<"__atomic_add_lu32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_sub_lu32 : GCCBuiltin<"__atomic_sub_lu32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_rsub_lu32 : GCCBuiltin<"__atomic_rsub_lu32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_xchg_lu32 : GCCBuiltin<"__atomic_xchg_lu32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_inc_lu32 : GCCBuiltin<"__atomic_inc_lu32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_dec_lu32 : GCCBuiltin<"__atomic_dec_lu32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_cmpxchg_lu32 : GCCBuiltin<"__atomic_cmpxchg_lu32">,
|
||||
TernaryAtomicInt;
|
||||
def int_AMDIL_atomic_min_lu32 : GCCBuiltin<"__atomic_min_lu32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_max_lu32 : GCCBuiltin<"__atomic_max_lu32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_and_lu32 : GCCBuiltin<"__atomic_and_lu32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_or_lu32 : GCCBuiltin<"__atomic_or_lu32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_mskor_lu32 : GCCBuiltin<"__atomic_mskor_lu32">,
|
||||
TernaryAtomicInt;
|
||||
def int_AMDIL_atomic_xor_lu32 : GCCBuiltin<"__atomic_xor_lu32">,
|
||||
BinaryAtomicInt;
|
||||
|
||||
/// Unsigned 32 bit integer atomics for region address space
|
||||
def int_AMDIL_atomic_add_ru32 : GCCBuiltin<"__atomic_add_ru32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_sub_ru32 : GCCBuiltin<"__atomic_sub_ru32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_rsub_ru32 : GCCBuiltin<"__atomic_rsub_ru32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_xchg_ru32 : GCCBuiltin<"__atomic_xchg_ru32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_inc_ru32 : GCCBuiltin<"__atomic_inc_ru32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_dec_ru32 : GCCBuiltin<"__atomic_dec_ru32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_cmpxchg_ru32 : GCCBuiltin<"__atomic_cmpxchg_ru32">,
|
||||
TernaryAtomicInt;
|
||||
def int_AMDIL_atomic_min_ru32 : GCCBuiltin<"__atomic_min_ru32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_max_ru32 : GCCBuiltin<"__atomic_max_ru32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_and_ru32 : GCCBuiltin<"__atomic_and_ru32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_or_ru32 : GCCBuiltin<"__atomic_or_ru32">,
|
||||
BinaryAtomicInt;
|
||||
def int_AMDIL_atomic_mskor_ru32 : GCCBuiltin<"__atomic_mskor_ru32">,
|
||||
TernaryAtomicInt;
|
||||
def int_AMDIL_atomic_xor_ru32 : GCCBuiltin<"__atomic_xor_ru32">,
|
||||
BinaryAtomicInt;
|
||||
|
||||
/// Semaphore signal/wait/init
|
||||
def int_AMDIL_semaphore_init : GCCBuiltin<"__amdil_semaphore_init">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty]>;
|
||||
def int_AMDIL_semaphore_wait : GCCBuiltin<"__amdil_semaphore_wait">,
|
||||
Intrinsic<[], [llvm_ptr_ty]>;
|
||||
def int_AMDIL_semaphore_signal : GCCBuiltin<"__amdil_semaphore_signal">,
|
||||
Intrinsic<[], [llvm_ptr_ty]>;
|
||||
def int_AMDIL_semaphore_size : GCCBuiltin<"__amdil_max_semaphore_size">,
|
||||
Intrinsic<[llvm_i32_ty], []>;
|
||||
}
|
|
@ -1,95 +0,0 @@
|
|||
//===-- AMDILMultiClass.td - AMDIL Multiclass defs ---*- tablegen -*-------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
// Multiclass that handles branch instructions
|
||||
multiclass BranchConditional<SDNode Op> {
|
||||
def _i32 : ILFormat<IL_OP_IFC, (outs),
|
||||
(ins brtarget:$target, GPRI32:$src0),
|
||||
"; i32 Pseudo branch instruction",
|
||||
[(Op bb:$target, GPRI32:$src0)]>;
|
||||
def _f32 : ILFormat<IL_OP_IFC, (outs),
|
||||
(ins brtarget:$target, GPRF32:$src0),
|
||||
"; f32 Pseudo branch instruction",
|
||||
[(Op bb:$target, GPRF32:$src0)]>;
|
||||
}
|
||||
|
||||
// Multiclass that handles memory store operations
|
||||
multiclass GTRUNCSTORE<string asm> {
|
||||
def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
|
||||
!strconcat(asm, " $val $ptr"),
|
||||
[(global_i8trunc_store GPRI32:$val, ADDR:$ptr)]>;
|
||||
def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
|
||||
!strconcat(asm, " $val $ptr"),
|
||||
[(global_i16trunc_store GPRI32:$val, ADDR:$ptr)]>;
|
||||
}
|
||||
|
||||
// Multiclass that handles memory store operations
|
||||
multiclass LTRUNCSTORE<string asm> {
|
||||
def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
|
||||
!strconcat(asm, " $val $ptr"),
|
||||
[(local_i8trunc_store GPRI32:$val, ADDR:$ptr)]>;
|
||||
def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
|
||||
!strconcat(asm, " $val $ptr"),
|
||||
[(local_i16trunc_store GPRI32:$val, ADDR:$ptr)]>;
|
||||
}
|
||||
|
||||
// Multiclass that handles memory store operations
|
||||
multiclass PTRUNCSTORE<string asm> {
|
||||
def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
|
||||
!strconcat(asm, " $val $ptr"),
|
||||
[(private_i8trunc_store GPRI32:$val, ADDR:$ptr)]>;
|
||||
def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
|
||||
!strconcat(asm, " $val $ptr"),
|
||||
[(private_i16trunc_store GPRI32:$val, ADDR:$ptr)]>;
|
||||
}
|
||||
|
||||
// Multiclass that handles memory store operations
|
||||
multiclass RTRUNCSTORE<string asm> {
|
||||
def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
|
||||
!strconcat(asm, " $val $ptr"),
|
||||
[(region_i8trunc_store GPRI32:$val, ADDR:$ptr)]>;
|
||||
def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
|
||||
!strconcat(asm, " $val $ptr"),
|
||||
[(region_i16trunc_store GPRI32:$val, ADDR:$ptr)]>;
|
||||
}
|
||||
|
||||
|
||||
// Multiclass that handles memory store operations
|
||||
multiclass STORE<string asm, PatFrag OpNode> {
|
||||
def _i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
|
||||
!strconcat(asm, " $val $ptr"),
|
||||
[(OpNode GPRI32:$val, ADDR:$ptr)]>;
|
||||
def _f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF32:$val, MEMI32:$ptr),
|
||||
!strconcat(asm, " $val $ptr"),
|
||||
[(OpNode GPRF32:$val, ADDR:$ptr)]>;
|
||||
}
|
||||
|
||||
// Multiclass that handles load operations
|
||||
multiclass LOAD<string asm, PatFrag OpNode> {
|
||||
def _i32 : OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst), (ins MEMI32:$ptr),
|
||||
!strconcat(asm, " $dst $ptr"),
|
||||
[(set GPRI32:$dst, (OpNode ADDR:$ptr))]>;
|
||||
def _f32 : OneInOneOut<IL_OP_MOV, (outs GPRF32:$dst), (ins MEMI32:$ptr),
|
||||
!strconcat(asm, " $dst $ptr"),
|
||||
[(set GPRF32:$dst, (OpNode ADDR:$ptr))]>;
|
||||
}
|
||||
|
||||
// Only scalar types should generate flow control
|
||||
multiclass BranchInstr<ILOpCode opc> {
|
||||
def _i32 : UnaryOpNoRet<opc, (outs), (ins GPRI32:$src),
|
||||
!strconcat(opc.Text, " $src"), []>;
|
||||
def _f32 : UnaryOpNoRet<opc, (outs), (ins GPRF32:$src),
|
||||
!strconcat(opc.Text, " $src"), []>;
|
||||
}
|
||||
// Only scalar types should generate flow control
|
||||
multiclass BranchInstr2<ILOpCode opc> {
|
||||
def _i32 : BinaryOpNoRet<opc, (outs), (ins GPRI32:$src0, GPRI32:$src1),
|
||||
!strconcat(opc.Text, " $src0, $src1"), []>;
|
||||
def _f32 : BinaryOpNoRet<opc, (outs), (ins GPRF32:$src0, GPRF32:$src1),
|
||||
!strconcat(opc.Text, " $src0, $src1"), []>;
|
||||
}
|
|
@ -1,71 +0,0 @@
|
|||
//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#include "AMDILNIDevice.h"
|
||||
#include "AMDILEvergreenDevice.h"
|
||||
#include "AMDILSubtarget.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AMDILNIDevice::AMDILNIDevice(AMDILSubtarget *ST)
|
||||
: AMDILEvergreenDevice(ST)
|
||||
{
|
||||
std::string name = ST->getDeviceName();
|
||||
if (name == "caicos") {
|
||||
mDeviceFlag = OCL_DEVICE_CAICOS;
|
||||
} else if (name == "turks") {
|
||||
mDeviceFlag = OCL_DEVICE_TURKS;
|
||||
} else if (name == "cayman") {
|
||||
mDeviceFlag = OCL_DEVICE_CAYMAN;
|
||||
} else {
|
||||
mDeviceFlag = OCL_DEVICE_BARTS;
|
||||
}
|
||||
}
|
||||
AMDILNIDevice::~AMDILNIDevice()
|
||||
{
|
||||
}
|
||||
|
||||
size_t
|
||||
AMDILNIDevice::getMaxLDSSize() const
|
||||
{
|
||||
if (usesHardware(AMDILDeviceInfo::LocalMem)) {
|
||||
return MAX_LDS_SIZE_900;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AMDILNIDevice::getGeneration() const
|
||||
{
|
||||
return AMDILDeviceInfo::HD6XXX;
|
||||
}
|
||||
|
||||
|
||||
AMDILCaymanDevice::AMDILCaymanDevice(AMDILSubtarget *ST)
|
||||
: AMDILNIDevice(ST)
|
||||
{
|
||||
setCaps();
|
||||
}
|
||||
|
||||
AMDILCaymanDevice::~AMDILCaymanDevice()
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
AMDILCaymanDevice::setCaps()
|
||||
{
|
||||
if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
|
||||
mHWBits.set(AMDILDeviceInfo::DoubleOps);
|
||||
mHWBits.set(AMDILDeviceInfo::FMA);
|
||||
}
|
||||
mHWBits.set(AMDILDeviceInfo::Signed24BitOps);
|
||||
mSWBits.reset(AMDILDeviceInfo::Signed24BitOps);
|
||||
mSWBits.set(AMDILDeviceInfo::ArenaSegment);
|
||||
}
|
||||
|
|
@ -1,59 +0,0 @@
|
|||
//===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface for the subtarget data classes.
|
||||
//
|
||||
//===---------------------------------------------------------------------===//
|
||||
// This file will define the interface that each generation needs to
|
||||
// implement in order to correctly answer queries on the capabilities of the
|
||||
// specific hardware.
|
||||
//===---------------------------------------------------------------------===//
|
||||
#ifndef _AMDILNIDEVICE_H_
|
||||
#define _AMDILNIDEVICE_H_
|
||||
#include "AMDILEvergreenDevice.h"
|
||||
#include "AMDILSubtarget.h"
|
||||
|
||||
namespace llvm {
|
||||
class AMDILSubtarget;
|
||||
//===---------------------------------------------------------------------===//
|
||||
// NI generation of devices and their respective sub classes
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
// The AMDILNIDevice is the base class for all Northern Island series of
|
||||
// cards. It is very similiar to the AMDILEvergreenDevice, with the major
|
||||
// exception being differences in wavefront size and hardware capabilities. The
|
||||
// NI devices are all 64 wide wavefronts and also add support for signed 24 bit
|
||||
// integer operations
|
||||
|
||||
class AMDILNIDevice : public AMDILEvergreenDevice {
|
||||
public:
|
||||
AMDILNIDevice(AMDILSubtarget*);
|
||||
virtual ~AMDILNIDevice();
|
||||
virtual size_t getMaxLDSSize() const;
|
||||
virtual uint32_t getGeneration() const;
|
||||
protected:
|
||||
}; // AMDILNIDevice
|
||||
|
||||
// Just as the AMDILCypressDevice is the double capable version of the
|
||||
// AMDILEvergreenDevice, the AMDILCaymanDevice is the double capable version of
|
||||
// the AMDILNIDevice. The other major difference that is not as useful from
|
||||
// standpoint is that the Cayman Device has 4 wide ALU's, whereas the rest of the
|
||||
// NI family is a 5 wide.
|
||||
|
||||
class AMDILCaymanDevice: public AMDILNIDevice {
|
||||
public:
|
||||
AMDILCaymanDevice(AMDILSubtarget*);
|
||||
virtual ~AMDILCaymanDevice();
|
||||
private:
|
||||
virtual void setCaps();
|
||||
}; // AMDILCaymanDevice
|
||||
|
||||
static const unsigned int MAX_LDS_SIZE_900 = AMDILDevice::MAX_LDS_SIZE_800;
|
||||
} // namespace llvm
|
||||
#endif // _AMDILNIDEVICE_H_
|
|
@ -1,47 +0,0 @@
|
|||
//===- AMDILNodes.td - AMD IL nodes ------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Flow Control DAG Nodes
|
||||
//===----------------------------------------------------------------------===//
|
||||
def IL_brcond : SDNode<"AMDILISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Comparison DAG Nodes
|
||||
//===----------------------------------------------------------------------===//
|
||||
def IL_cmp : SDNode<"AMDILISD::CMP", SDTIL_Cmp>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Call/Return DAG Nodes
|
||||
//===----------------------------------------------------------------------===//
|
||||
def IL_call : SDNode<"AMDILISD::CALL", SDTIL_Call,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
|
||||
|
||||
def IL_retflag : SDNode<"AMDILISD::RET_FLAG", SDTNone,
|
||||
[SDNPHasChain, SDNPOptInGlue]>;
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Instructions
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Floating point math functions
|
||||
def IL_cmov_logical : SDNode<"AMDILISD::CMOVLOG", SDTIL_GenTernaryOp>;
|
||||
def IL_div_inf : SDNode<"AMDILISD::DIV_INF", SDTIL_GenBinaryOp>;
|
||||
def IL_mad : SDNode<"AMDILISD::MAD", SDTIL_GenTernaryOp>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Integer functions
|
||||
//===----------------------------------------------------------------------===//
|
||||
def IL_umul : SDNode<"AMDILISD::UMUL" , SDTIntBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Vector functions
|
||||
//===----------------------------------------------------------------------===//
|
||||
def IL_vbuild : SDNode<"AMDILISD::VBUILD", SDTIL_GenVecBuild,
|
||||
[]>;
|
|
@ -1,32 +0,0 @@
|
|||
//===- AMDILOperands.td - AMD IL Operands ------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Custom memory operand
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def MEMI32 : Operand<i32> {
|
||||
let PrintMethod = "printMemOperand";
|
||||
let MIOperandInfo = (ops GPRI32, GPRI32);
|
||||
}
|
||||
|
||||
// Call target types
|
||||
def calltarget : Operand<i32>;
|
||||
def brtarget : Operand<OtherVT>;
|
||||
|
||||
// def v2i8imm : Operand<v2i8>;
|
||||
// def v4i8imm : Operand<v4i8>;
|
||||
// def v2i16imm : Operand<v2i16>;
|
||||
// def v4i16imm : Operand<v4i16>;
|
||||
// def v2i32imm : Operand<v2i32>;
|
||||
// def v4i32imm : Operand<v4i32>;
|
||||
// def v2i64imm : Operand<v2i64>;
|
||||
// def v2f32imm : Operand<v2f32>;
|
||||
// def v4f32imm : Operand<v4f32>;
|
||||
// def v2f64imm : Operand<v2f64>;
|
||||
|
|
@ -1,504 +0,0 @@
|
|||
//===- AMDILPatterns.td - AMDIL Target Patterns------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Store pattern fragments
|
||||
//===----------------------------------------------------------------------===//
|
||||
def truncstorei64 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstore node:$val, node:$ptr), [{
|
||||
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i64;
|
||||
}]>;
|
||||
def truncstorev2i8 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstore node:$val, node:$ptr), [{
|
||||
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i8;
|
||||
}]>;
|
||||
def truncstorev2i16 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstore node:$val, node:$ptr), [{
|
||||
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16;
|
||||
}]>;
|
||||
def truncstorev2i32 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstore node:$val, node:$ptr), [{
|
||||
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i32;
|
||||
}]>;
|
||||
def truncstorev2i64 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstore node:$val, node:$ptr), [{
|
||||
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i64;
|
||||
}]>;
|
||||
def truncstorev2f32 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstore node:$val, node:$ptr), [{
|
||||
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2f32;
|
||||
}]>;
|
||||
def truncstorev2f64 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstore node:$val, node:$ptr), [{
|
||||
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2f64;
|
||||
}]>;
|
||||
def truncstorev4i8 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstore node:$val, node:$ptr), [{
|
||||
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8;
|
||||
}]>;
|
||||
def truncstorev4i16 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstore node:$val, node:$ptr), [{
|
||||
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i16;
|
||||
}]>;
|
||||
def truncstorev4i32 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstore node:$val, node:$ptr), [{
|
||||
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i32;
|
||||
}]>;
|
||||
def truncstorev4f32 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstore node:$val, node:$ptr), [{
|
||||
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32;
|
||||
}]>;
|
||||
|
||||
def global_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(store node:$val, node:$ptr), [{
|
||||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def private_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(store node:$val, node:$ptr), [{
|
||||
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def local_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(store node:$val, node:$ptr), [{
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def region_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(store node:$val, node:$ptr), [{
|
||||
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def global_i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorei8 node:$val, node:$ptr), [{
|
||||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def global_i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorei16 node:$val, node:$ptr), [{
|
||||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def global_i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorei32 node:$val, node:$ptr), [{
|
||||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def global_i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorei64 node:$val, node:$ptr), [{
|
||||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def global_f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstoref32 node:$val, node:$ptr), [{
|
||||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def global_f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstoref64 node:$val, node:$ptr), [{
|
||||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def global_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2i8 node:$val, node:$ptr), [{
|
||||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def global_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2i16 node:$val, node:$ptr), [{
|
||||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def global_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2i32 node:$val, node:$ptr), [{
|
||||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def global_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2i64 node:$val, node:$ptr), [{
|
||||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def global_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2f32 node:$val, node:$ptr), [{
|
||||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def global_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2f64 node:$val, node:$ptr), [{
|
||||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def global_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev4i8 node:$val, node:$ptr), [{
|
||||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def global_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev4i16 node:$val, node:$ptr), [{
|
||||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def global_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev4i32 node:$val, node:$ptr), [{
|
||||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def global_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev4f32 node:$val, node:$ptr), [{
|
||||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def private_trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstore node:$val, node:$ptr), [{
|
||||
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def private_i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorei8 node:$val, node:$ptr), [{
|
||||
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def private_i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorei16 node:$val, node:$ptr), [{
|
||||
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def private_i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorei32 node:$val, node:$ptr), [{
|
||||
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def private_i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorei64 node:$val, node:$ptr), [{
|
||||
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def private_f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstoref32 node:$val, node:$ptr), [{
|
||||
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def private_f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstoref64 node:$val, node:$ptr), [{
|
||||
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def private_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2i8 node:$val, node:$ptr), [{
|
||||
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def private_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2i16 node:$val, node:$ptr), [{
|
||||
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def private_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2i32 node:$val, node:$ptr), [{
|
||||
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def private_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2i64 node:$val, node:$ptr), [{
|
||||
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def private_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2f32 node:$val, node:$ptr), [{
|
||||
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def private_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2f64 node:$val, node:$ptr), [{
|
||||
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def private_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev4i8 node:$val, node:$ptr), [{
|
||||
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def private_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev4i16 node:$val, node:$ptr), [{
|
||||
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def private_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev4i32 node:$val, node:$ptr), [{
|
||||
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def private_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev4f32 node:$val, node:$ptr), [{
|
||||
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def local_trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstore node:$val, node:$ptr), [{
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def local_i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorei8 node:$val, node:$ptr), [{
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def local_i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorei16 node:$val, node:$ptr), [{
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def local_i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorei32 node:$val, node:$ptr), [{
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def local_i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorei64 node:$val, node:$ptr), [{
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def local_f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstoref32 node:$val, node:$ptr), [{
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def local_f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstoref64 node:$val, node:$ptr), [{
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def local_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2i8 node:$val, node:$ptr), [{
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def local_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2i16 node:$val, node:$ptr), [{
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def local_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2i32 node:$val, node:$ptr), [{
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def local_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2i64 node:$val, node:$ptr), [{
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def local_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2f32 node:$val, node:$ptr), [{
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def local_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2f64 node:$val, node:$ptr), [{
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def local_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev4i8 node:$val, node:$ptr), [{
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def local_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev4i16 node:$val, node:$ptr), [{
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def local_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev4i32 node:$val, node:$ptr), [{
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def local_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev4f32 node:$val, node:$ptr), [{
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def region_trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstore node:$val, node:$ptr), [{
|
||||
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def region_i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorei8 node:$val, node:$ptr), [{
|
||||
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def region_i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorei16 node:$val, node:$ptr), [{
|
||||
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def region_i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorei32 node:$val, node:$ptr), [{
|
||||
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def region_i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorei64 node:$val, node:$ptr), [{
|
||||
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def region_f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstoref32 node:$val, node:$ptr), [{
|
||||
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def region_f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstoref64 node:$val, node:$ptr), [{
|
||||
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def region_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2i8 node:$val, node:$ptr), [{
|
||||
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def region_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2i16 node:$val, node:$ptr), [{
|
||||
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def region_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2i32 node:$val, node:$ptr), [{
|
||||
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def region_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2i64 node:$val, node:$ptr), [{
|
||||
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def region_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2f32 node:$val, node:$ptr), [{
|
||||
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def region_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev2f64 node:$val, node:$ptr), [{
|
||||
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def region_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev4i8 node:$val, node:$ptr), [{
|
||||
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def region_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev4i16 node:$val, node:$ptr), [{
|
||||
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def region_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev4i32 node:$val, node:$ptr), [{
|
||||
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
def region_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorev4f32 node:$val, node:$ptr), [{
|
||||
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Load pattern fragments
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Global address space loads
|
||||
def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
def global_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
|
||||
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
def global_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
|
||||
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
def global_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
|
||||
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
// Private address space loads
|
||||
def private_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
return isPrivateLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
def private_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
|
||||
return isPrivateLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
def private_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
|
||||
return isPrivateLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
def private_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
|
||||
return isPrivateLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
// Local address space loads
|
||||
def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
return isLocalLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
def local_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
|
||||
return isLocalLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
def local_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
|
||||
return isLocalLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
def local_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
|
||||
return isLocalLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
// Region address space loads
|
||||
def region_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
return isRegionLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
def region_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
|
||||
return isRegionLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
def region_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
|
||||
return isRegionLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
def region_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
|
||||
return isRegionLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
// Constant address space loads
|
||||
def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
|
||||
}]>;
|
||||
def constant_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
|
||||
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
|
||||
}]>;
|
||||
def constant_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
|
||||
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
|
||||
}]>;
|
||||
def constant_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
|
||||
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
|
||||
}]>;
|
||||
// Constant pool loads
|
||||
def cp_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
return isCPLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
def cp_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
|
||||
return isCPLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
def cp_zext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
|
||||
return isCPLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
def cp_aext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
|
||||
return isCPLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Complex addressing mode patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>;
|
||||
def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>;
|
||||
def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>;
|
||||
def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Conditional Instruction Pattern Leafs
|
||||
//===----------------------------------------------------------------------===//
|
||||
class IL_CC_Op<int N> : PatLeaf<(i32 N)>;
|
||||
def IL_CC_D_EQ : IL_CC_Op<0>;
|
||||
def IL_CC_D_GE : IL_CC_Op<1>;
|
||||
def IL_CC_D_LT : IL_CC_Op<2>;
|
||||
def IL_CC_D_NE : IL_CC_Op<3>;
|
||||
def IL_CC_F_EQ : IL_CC_Op<4>;
|
||||
def IL_CC_F_GE : IL_CC_Op<5>;
|
||||
def IL_CC_F_LT : IL_CC_Op<6>;
|
||||
def IL_CC_F_NE : IL_CC_Op<7>;
|
||||
def IL_CC_I_EQ : IL_CC_Op<8>;
|
||||
def IL_CC_I_GE : IL_CC_Op<9>;
|
||||
def IL_CC_I_LT : IL_CC_Op<10>;
|
||||
def IL_CC_I_NE : IL_CC_Op<11>;
|
||||
def IL_CC_U_GE : IL_CC_Op<12>;
|
||||
def IL_CC_U_LT : IL_CC_Op<13>;
|
||||
// Pseudo IL comparison instructions that aren't natively supported
|
||||
def IL_CC_F_GT : IL_CC_Op<14>;
|
||||
def IL_CC_U_GT : IL_CC_Op<15>;
|
||||
def IL_CC_I_GT : IL_CC_Op<16>;
|
||||
def IL_CC_D_GT : IL_CC_Op<17>;
|
||||
def IL_CC_F_LE : IL_CC_Op<18>;
|
||||
def IL_CC_U_LE : IL_CC_Op<19>;
|
||||
def IL_CC_I_LE : IL_CC_Op<20>;
|
||||
def IL_CC_D_LE : IL_CC_Op<21>;
|
||||
def IL_CC_F_UNE : IL_CC_Op<22>;
|
||||
def IL_CC_F_UEQ : IL_CC_Op<23>;
|
||||
def IL_CC_F_ULT : IL_CC_Op<24>;
|
||||
def IL_CC_F_UGT : IL_CC_Op<25>;
|
||||
def IL_CC_F_ULE : IL_CC_Op<26>;
|
||||
def IL_CC_F_UGE : IL_CC_Op<27>;
|
||||
def IL_CC_F_ONE : IL_CC_Op<28>;
|
||||
def IL_CC_F_OEQ : IL_CC_Op<29>;
|
||||
def IL_CC_F_OLT : IL_CC_Op<30>;
|
||||
def IL_CC_F_OGT : IL_CC_Op<31>;
|
||||
def IL_CC_F_OLE : IL_CC_Op<32>;
|
||||
def IL_CC_F_OGE : IL_CC_Op<33>;
|
||||
def IL_CC_D_UNE : IL_CC_Op<34>;
|
||||
def IL_CC_D_UEQ : IL_CC_Op<35>;
|
||||
def IL_CC_D_ULT : IL_CC_Op<36>;
|
||||
def IL_CC_D_UGT : IL_CC_Op<37>;
|
||||
def IL_CC_D_ULE : IL_CC_Op<38>;
|
||||
def IL_CC_D_UGE : IL_CC_Op<39>;
|
||||
def IL_CC_D_ONE : IL_CC_Op<30>;
|
||||
def IL_CC_D_OEQ : IL_CC_Op<41>;
|
||||
def IL_CC_D_OLT : IL_CC_Op<42>;
|
||||
def IL_CC_D_OGT : IL_CC_Op<43>;
|
||||
def IL_CC_D_OLE : IL_CC_Op<44>;
|
||||
def IL_CC_D_OGE : IL_CC_Op<45>;
|
||||
def IL_CC_U_EQ : IL_CC_Op<46>;
|
||||
def IL_CC_U_NE : IL_CC_Op<47>;
|
||||
def IL_CC_F_O : IL_CC_Op<48>;
|
||||
def IL_CC_D_O : IL_CC_Op<49>;
|
||||
def IL_CC_F_UO : IL_CC_Op<50>;
|
||||
def IL_CC_D_UO : IL_CC_Op<51>;
|
||||
def IL_CC_L_LE : IL_CC_Op<52>;
|
||||
def IL_CC_L_GE : IL_CC_Op<53>;
|
||||
def IL_CC_L_EQ : IL_CC_Op<54>;
|
||||
def IL_CC_L_NE : IL_CC_Op<55>;
|
||||
def IL_CC_L_LT : IL_CC_Op<56>;
|
||||
def IL_CC_L_GT : IL_CC_Op<57>;
|
||||
def IL_CC_UL_LE : IL_CC_Op<58>;
|
||||
def IL_CC_UL_GE : IL_CC_Op<59>;
|
||||
def IL_CC_UL_EQ : IL_CC_Op<60>;
|
||||
def IL_CC_UL_NE : IL_CC_Op<61>;
|
||||
def IL_CC_UL_LT : IL_CC_Op<62>;
|
||||
def IL_CC_UL_GT : IL_CC_Op<63>;
|
File diff suppressed because it is too large
Load Diff
|
@ -1,174 +0,0 @@
|
|||
//===- AMDILProfiles.td - AMD IL Profiles ------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
// These are used for custom selection dag type profiles
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Custom Selection DAG Type Profiles
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SDTCisDP - The specified operand has double type
|
||||
// Tablegen needs to be hacked to get this constraint to work
|
||||
//class SDTCisDP<int OpNum> : SDTypeConstraint<OpNum>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Generic Profile Types
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def SDTIL_GenUnaryOp : SDTypeProfile<1, 1, [
|
||||
SDTCisSameAs<0, 1>
|
||||
]>;
|
||||
def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [
|
||||
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
|
||||
]>;
|
||||
def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [
|
||||
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>
|
||||
]>;
|
||||
def SDTIL_GenCMovLog : SDTypeProfile<1, 3, [
|
||||
SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisInt<1>
|
||||
]>;
|
||||
def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [
|
||||
SDTCisEltOfVec<1, 0>
|
||||
]>;
|
||||
|
||||
def SDTIL_GenVecExtract : SDTypeProfile<1, 2, [
|
||||
SDTCisEltOfVec<0, 1>, SDTCisVT<2, i32>
|
||||
]>;
|
||||
|
||||
def SDTIL_GenVecInsert : SDTypeProfile<1, 4, [
|
||||
SDTCisEltOfVec<2, 1>, SDTCisSameAs<0, 1>,
|
||||
SDTCisVT<3, i32>, SDTCisVT<4, i32>
|
||||
]>;
|
||||
|
||||
def SDTIL_GenVecShuffle : SDTypeProfile <1, 2, [
|
||||
SDTCisSameAs<0, 1>, SDTCisVT<2, i32>
|
||||
]>;
|
||||
|
||||
def SDTIL_GenVecConcat : SDTypeProfile <1, 2, [
|
||||
SDTCisSameAs<1, 2>
|
||||
]>;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Conversion Profile Types
|
||||
//===----------------------------------------------------------------------===//
|
||||
def SDTIL_DPToFPOp : SDTypeProfile<1, 1, [
|
||||
SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>
|
||||
]>; // d2f
|
||||
|
||||
def SDTIL_AnyToInt : SDTypeProfile<1, 1, [
|
||||
SDTCisInt<0>
|
||||
]>;
|
||||
def SDTIL_IntToAny : SDTypeProfile<1, 1, [
|
||||
SDTCisInt<1>
|
||||
]>;
|
||||
def SDTIL_GenBitConv : SDTypeProfile<1, 1, []>;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Scalar Profile Types
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Add instruction pattern to handle offsets of memory operationns
|
||||
def SDTIL_AddAddrri: SDTypeProfile<1, 2, [
|
||||
SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisSameAs<0, 2>
|
||||
]>;
|
||||
def SDTIL_AddAddrir : SDTypeProfile<1, 2, [
|
||||
SDTCisInt<0>, SDTCisPtrTy<2>, SDTCisSameAs<0, 1>
|
||||
]>;
|
||||
|
||||
def SDTIL_LCreate : SDTypeProfile<1, 2, [
|
||||
SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>
|
||||
]>;
|
||||
def SDTIL_LCreate2 : SDTypeProfile<1, 2, [
|
||||
SDTCisVT<0, v2i64>, SDTCisVT<1, v2i32>, SDTCisSameAs<1, 2>
|
||||
]>;
|
||||
def SDTIL_LComp : SDTypeProfile<1, 1, [
|
||||
SDTCisVT<0, i32>, SDTCisVT<1, i64>
|
||||
]>;
|
||||
def SDTIL_LComp2 : SDTypeProfile<1, 1, [
|
||||
SDTCisVT<0, v2i32>, SDTCisVT<1, v2i64>
|
||||
]>;
|
||||
def SDTIL_DCreate : SDTypeProfile<1, 2, [
|
||||
SDTCisVT<0, f64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>
|
||||
]>;
|
||||
def SDTIL_DComp : SDTypeProfile<1, 1, [
|
||||
SDTCisVT<0, i32>, SDTCisVT<1, f64>
|
||||
]>;
|
||||
def SDTIL_DCreate2 : SDTypeProfile<1, 2, [
|
||||
SDTCisVT<0, v2f64>, SDTCisVT<1, v2i32>, SDTCisSameAs<1, 2>
|
||||
]>;
|
||||
def SDTIL_DComp2 : SDTypeProfile<1, 1, [
|
||||
SDTCisVT<0, v2i32>, SDTCisVT<1, v2f64>
|
||||
]>;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Flow Control Profile Types
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Profile for Normal Call
|
||||
def SDTIL_Call : SDTypeProfile<0, 1, [
|
||||
SDTCisVT<0, i32>
|
||||
]>;
|
||||
// Branch instruction where second and third are basic blocks
|
||||
def SDTIL_BRCond : SDTypeProfile<0, 2, [
|
||||
SDTCisVT<0, OtherVT>
|
||||
]>;
|
||||
// Comparison instruction
|
||||
def SDTIL_Cmp : SDTypeProfile<1, 3, [
|
||||
SDTCisSameAs<0, 2>, SDTCisSameAs<2,3>, SDTCisVT<1, i32>
|
||||
]>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Call Sequence Profiles
|
||||
//===----------------------------------------------------------------------===//
|
||||
def SDTIL_CallSeqStart : SDCallSeqStart< [
|
||||
SDTCisVT<0, i32>
|
||||
]>;
|
||||
def SDTIL_CallSeqEnd : SDCallSeqEnd< [
|
||||
SDTCisVT<0, i32>, SDTCisVT<1, i32>
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Image Operation Profiles
|
||||
//===----------------------------------------------------------------------===//
|
||||
def SDTIL_ImageRead : SDTypeProfile<1, 3,
|
||||
[SDTCisVT<0, v4i32>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, SDTCisVT<3, v4f32>]>;
|
||||
def SDTIL_ImageWrite : SDTypeProfile<0, 3,
|
||||
[SDTCisPtrTy<0>, SDTCisVT<1, v2i32>, SDTCisVT<2, v4i32>]>;
|
||||
def SDTIL_ImageWrite3D : SDTypeProfile<0, 3,
|
||||
[SDTCisPtrTy<0>, SDTCisVT<1, v4i32>, SDTCisVT<2, v4i32>]>;
|
||||
def SDTIL_ImageInfo : SDTypeProfile<1, 1,
|
||||
[SDTCisVT<0, v4i32>, SDTCisPtrTy<1>]>;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Atomic Operation Profiles
|
||||
//===----------------------------------------------------------------------===//
|
||||
def SDTIL_UniAtomNoRet : SDTypeProfile<0, 2, [
|
||||
SDTCisPtrTy<0>, SDTCisVT<1, i32>
|
||||
]>;
|
||||
def SDTIL_BinAtomNoRet : SDTypeProfile<0, 3, [
|
||||
SDTCisPtrTy<0>, SDTCisVT<1, i32>, SDTCisVT<2, i32>
|
||||
]>;
|
||||
def SDTIL_TriAtomNoRet : SDTypeProfile<0, 4, [
|
||||
SDTCisPtrTy<0>, SDTCisVT<1, i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>
|
||||
]>;
|
||||
def SDTIL_UniAtom : SDTypeProfile<1, 2, [
|
||||
SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, i32>
|
||||
]>;
|
||||
def SDTIL_BinAtom : SDTypeProfile<1, 3, [
|
||||
SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, SDTCisVT<3, i32>
|
||||
]>;
|
||||
def SDTIL_TriAtom : SDTypeProfile<1, 4, [
|
||||
SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, i32>,
|
||||
SDTCisVT<3, i32>, SDTCisVT<4, i32>
|
||||
]>;
|
||||
|
||||
def SDTIL_BinAtomFloat : SDTypeProfile<1, 3, [
|
||||
SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, f32>, SDTCisVT<3, f32>
|
||||
]>;
|
||||
def SDTIL_BinAtomNoRetFloat : SDTypeProfile<0, 3, [
|
||||
SDTCisPtrTy<0>, SDTCisVT<1, f32>, SDTCisVT<2, f32>
|
||||
]>;
|
||||
|
||||
def SDTIL_Append : SDTypeProfile<1, 1, [
|
||||
SDTCisVT<0, i32>, SDTCisPtrTy<1>
|
||||
]>;
|
|
@ -1,162 +0,0 @@
|
|||
//===- AMDILRegisterInfo.cpp - AMDIL Register Information -------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the AMDIL implementation of the TargetRegisterInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDILRegisterInfo.h"
|
||||
#include "AMDIL.h"
|
||||
#include "AMDILInstrInfo.h"
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AMDILRegisterInfo::AMDILRegisterInfo(TargetMachine &tm,
|
||||
const TargetInstrInfo &tii)
|
||||
: AMDILGenRegisterInfo(0), // RA???
|
||||
TM(tm), TII(tii)
|
||||
{
|
||||
baseOffset = 0;
|
||||
nextFuncOffset = 0;
|
||||
}
|
||||
|
||||
const uint16_t*
|
||||
AMDILRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const
|
||||
{
|
||||
static const uint16_t CalleeSavedRegs[] = { 0 };
|
||||
// TODO: Does IL need to actually have any callee saved regs?
|
||||
// I don't think we do since we can just use sequential registers
|
||||
// Maybe this would be easier if every function call was inlined first
|
||||
// and then there would be no callee issues to deal with
|
||||
//TODO(getCalleeSavedRegs);
|
||||
return CalleeSavedRegs;
|
||||
}
|
||||
|
||||
BitVector
|
||||
AMDILRegisterInfo::getReservedRegs(const MachineFunction &MF) const
|
||||
{
|
||||
BitVector Reserved(getNumRegs());
|
||||
// We reserve the first getNumRegs() registers as they are the ones passed
|
||||
// in live-in/live-out
|
||||
// and therefor cannot be killed by the scheduler. This works around a bug
|
||||
// discovered
|
||||
// that was causing the linearscan register allocator to kill registers
|
||||
// inside of the
|
||||
// function that were also passed as LiveIn registers.
|
||||
for (unsigned int x = 0, y = 256; x < y; ++x) {
|
||||
Reserved.set(x);
|
||||
}
|
||||
return Reserved;
|
||||
}
|
||||
|
||||
BitVector
|
||||
AMDILRegisterInfo::getAllocatableSet(const MachineFunction &MF,
|
||||
const TargetRegisterClass *RC = NULL) const
|
||||
{
|
||||
BitVector Allocatable(getNumRegs());
|
||||
Allocatable.clear();
|
||||
return Allocatable;
|
||||
}
|
||||
|
||||
const TargetRegisterClass* const*
|
||||
AMDILRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const
|
||||
{
|
||||
static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
|
||||
// TODO: Keep in sync with getCalleeSavedRegs
|
||||
//TODO(getCalleeSavedRegClasses);
|
||||
return CalleeSavedRegClasses;
|
||||
}
|
||||
void
|
||||
AMDILRegisterInfo::eliminateCallFramePseudoInstr(
|
||||
MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) const
|
||||
{
|
||||
MBB.erase(I);
|
||||
}
|
||||
|
||||
// For each frame index we find, we store the offset in the stack which is
|
||||
// being pushed back into the global buffer. The offset into the stack where
|
||||
// the value is stored is copied into a new register and the frame index is
|
||||
// then replaced with that register.
|
||||
void
|
||||
AMDILRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||
int SPAdj,
|
||||
RegScavenger *RS) const
|
||||
{
|
||||
assert(!"Implement");
|
||||
}
|
||||
|
||||
void
|
||||
AMDILRegisterInfo::processFunctionBeforeFrameFinalized(
|
||||
MachineFunction &MF) const
|
||||
{
|
||||
//TODO(processFunctionBeforeFrameFinalized);
|
||||
// Here we keep track of the amount of stack that the current function
|
||||
// uses so
|
||||
// that we can set the offset to the end of the stack and any other
|
||||
// function call
|
||||
// will not overwrite any stack variables.
|
||||
// baseOffset = nextFuncOffset;
|
||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
|
||||
for (uint32_t x = 0, y = MFI->getNumObjects(); x < y; ++x) {
|
||||
int64_t size = MFI->getObjectSize(x);
|
||||
if (!(size % 4) && size > 1) {
|
||||
nextFuncOffset += size;
|
||||
} else {
|
||||
nextFuncOffset += 16;
|
||||
}
|
||||
}
|
||||
}
|
||||
unsigned int
|
||||
AMDILRegisterInfo::getRARegister() const
|
||||
{
|
||||
return AMDGPU::RA;
|
||||
}
|
||||
|
||||
unsigned int
|
||||
AMDILRegisterInfo::getFrameRegister(const MachineFunction &MF) const
|
||||
{
|
||||
return AMDGPU::FP;
|
||||
}
|
||||
|
||||
unsigned int
|
||||
AMDILRegisterInfo::getEHExceptionRegister() const
|
||||
{
|
||||
assert(0 && "What is the exception register");
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned int
|
||||
AMDILRegisterInfo::getEHHandlerRegister() const
|
||||
{
|
||||
assert(0 && "What is the exception handler register");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t
|
||||
AMDILRegisterInfo::getStackSize() const
|
||||
{
|
||||
return nextFuncOffset - baseOffset;
|
||||
}
|
||||
|
||||
#define GET_REGINFO_TARGET_DESC
|
||||
#include "AMDGPUGenRegisterInfo.inc"
|
||||
|
|
@ -1,95 +0,0 @@
|
|||
//===- AMDILRegisterInfo.h - AMDIL Register Information Impl ----*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the AMDIL implementation of the TargetRegisterInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDILREGISTERINFO_H_
|
||||
#define AMDILREGISTERINFO_H_
|
||||
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
|
||||
#define GET_REGINFO_HEADER
|
||||
#include "AMDGPUGenRegisterInfo.inc"
|
||||
// See header file for explanation
|
||||
|
||||
namespace llvm
|
||||
{
|
||||
|
||||
class TargetInstrInfo;
|
||||
class Type;
|
||||
|
||||
/// DWARFFlavour - Flavour of dwarf regnumbers
|
||||
///
|
||||
namespace DWARFFlavour {
|
||||
enum {
|
||||
AMDIL_Generic = 0
|
||||
};
|
||||
}
|
||||
|
||||
struct AMDILRegisterInfo : public AMDILGenRegisterInfo
|
||||
{
|
||||
TargetMachine &TM;
|
||||
const TargetInstrInfo &TII;
|
||||
|
||||
AMDILRegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii);
|
||||
/// Code Generation virtual methods...
|
||||
const uint16_t * getCalleeSavedRegs(const MachineFunction *MF = 0) const;
|
||||
|
||||
const TargetRegisterClass* const*
|
||||
getCalleeSavedRegClasses(
|
||||
const MachineFunction *MF = 0) const;
|
||||
|
||||
BitVector
|
||||
getReservedRegs(const MachineFunction &MF) const;
|
||||
BitVector
|
||||
getAllocatableSet(const MachineFunction &MF,
|
||||
const TargetRegisterClass *RC) const;
|
||||
|
||||
void
|
||||
eliminateCallFramePseudoInstr(
|
||||
MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) const;
|
||||
void
|
||||
eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||
int SPAdj, RegScavenger *RS = NULL) const;
|
||||
|
||||
void
|
||||
processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
|
||||
|
||||
// Debug information queries.
|
||||
unsigned int
|
||||
getRARegister() const;
|
||||
|
||||
unsigned int
|
||||
getFrameRegister(const MachineFunction &MF) const;
|
||||
|
||||
// Exception handling queries.
|
||||
unsigned int
|
||||
getEHExceptionRegister() const;
|
||||
unsigned int
|
||||
getEHHandlerRegister() const;
|
||||
|
||||
int64_t
|
||||
getStackSize() const;
|
||||
|
||||
virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT)
|
||||
const {
|
||||
return &AMDGPU::GPRI32RegClass;
|
||||
}
|
||||
private:
|
||||
mutable int64_t baseOffset;
|
||||
mutable int64_t nextFuncOffset;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // AMDILREGISTERINFO_H_
|
|
@ -1,110 +0,0 @@
|
|||
//===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// Declarations that describe the AMDIL register file
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class AMDILReg<bits<16> num, string n> : Register<n> {
|
||||
field bits<16> Value;
|
||||
let Value = num;
|
||||
let Namespace = "AMDGPU";
|
||||
}
|
||||
|
||||
// We will start with 8 registers for each class before expanding to more
|
||||
// Since the swizzle is added based on the register class, we can leave it
|
||||
// off here and just specify different registers for different register classes
|
||||
def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>;
|
||||
def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>;
|
||||
def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>;
|
||||
def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>;
|
||||
def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>;
|
||||
def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>;
|
||||
def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>;
|
||||
def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>;
|
||||
def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>;
|
||||
def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>;
|
||||
def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>;
|
||||
def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>;
|
||||
def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>;
|
||||
def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>;
|
||||
def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>;
|
||||
def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>;
|
||||
def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>;
|
||||
def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>;
|
||||
def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>;
|
||||
def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>;
|
||||
|
||||
// All registers between 1000 and 1024 are reserved and cannot be used
|
||||
// unless commented in this section
|
||||
// r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's
|
||||
// r1020 is used to hold the frame index for local arrays
|
||||
// r1019 is used to hold the dynamic stack allocation pointer
|
||||
// r1018 is used as a temporary register for handwritten code
|
||||
// r1017 is used as a temporary register for handwritten code
|
||||
// r1016 is used as a temporary register for load/store code
|
||||
// r1015 is used as a temporary register for data segment offset
|
||||
// r1014 is used as a temporary register for store code
|
||||
// r1013 is used as the section data pointer register
|
||||
// r1012-r1010 and r1001-r1008 are used for temporary I/O registers
|
||||
// r1009 is used as the frame pointer register
|
||||
// r999 is used as the mem register.
|
||||
// r998 is used as the return address register.
|
||||
//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>;
|
||||
//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>;
|
||||
//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>;
|
||||
//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>;
|
||||
//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>;
|
||||
//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>;
|
||||
def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>;
|
||||
def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>;
|
||||
def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>;
|
||||
def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>;
|
||||
def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>;
|
||||
def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>;
|
||||
def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>;
|
||||
def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>;
|
||||
def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>;
|
||||
def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>;
|
||||
def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>;
|
||||
def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>;
|
||||
def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>;
|
||||
def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>;
|
||||
def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>;
|
||||
def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>;
|
||||
def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>;
|
||||
def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>;
|
||||
def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>;
|
||||
def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>;
|
||||
def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>;
|
||||
def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>;
|
||||
def GPRI16 : RegisterClass<"AMDGPU", [i16], 16,
|
||||
(add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
|
||||
{
|
||||
let AltOrders = [(add (sequence "R%u", 1, 20))];
|
||||
let AltOrderSelect = [{
|
||||
return 1;
|
||||
}];
|
||||
}
|
||||
def GPRI32 : RegisterClass<"AMDGPU", [i32], 32,
|
||||
(add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
|
||||
{
|
||||
let AltOrders = [(add (sequence "R%u", 1, 20))];
|
||||
let AltOrderSelect = [{
|
||||
return 1;
|
||||
}];
|
||||
}
|
||||
def GPRF32 : RegisterClass<"AMDGPU", [f32], 32,
|
||||
(add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
|
||||
{
|
||||
let AltOrders = [(add (sequence "R%u", 1, 20))];
|
||||
let AltOrderSelect = [{
|
||||
return 1;
|
||||
}];
|
||||
}
|
|
@ -1,49 +0,0 @@
|
|||
//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#include "AMDILSIDevice.h"
|
||||
#include "AMDILEvergreenDevice.h"
|
||||
#include "AMDILNIDevice.h"
|
||||
#include "AMDILSubtarget.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AMDILSIDevice::AMDILSIDevice(AMDILSubtarget *ST)
|
||||
: AMDILEvergreenDevice(ST)
|
||||
{
|
||||
}
|
||||
AMDILSIDevice::~AMDILSIDevice()
|
||||
{
|
||||
}
|
||||
|
||||
size_t
|
||||
AMDILSIDevice::getMaxLDSSize() const
|
||||
{
|
||||
if (usesHardware(AMDILDeviceInfo::LocalMem)) {
|
||||
return MAX_LDS_SIZE_900;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AMDILSIDevice::getGeneration() const
|
||||
{
|
||||
return AMDILDeviceInfo::HD7XXX;
|
||||
}
|
||||
|
||||
std::string
|
||||
AMDILSIDevice::getDataLayout() const
|
||||
{
|
||||
return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"
|
||||
"-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
|
||||
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
|
||||
"-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
|
||||
"-v512:512:512-v1024:1024:1024-v2048:2048:2048"
|
||||
"-n8:16:32:64");
|
||||
}
|
|
@ -1,45 +0,0 @@
|
|||
//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface for the subtarget data classes.
|
||||
//
|
||||
//===---------------------------------------------------------------------===//
|
||||
// This file will define the interface that each generation needs to
|
||||
// implement in order to correctly answer queries on the capabilities of the
|
||||
// specific hardware.
|
||||
//===---------------------------------------------------------------------===//
|
||||
#ifndef _AMDILSIDEVICE_H_
|
||||
#define _AMDILSIDEVICE_H_
|
||||
#include "AMDILEvergreenDevice.h"
|
||||
#include "AMDILSubtarget.h"
|
||||
|
||||
namespace llvm {
|
||||
class AMDILSubtarget;
|
||||
//===---------------------------------------------------------------------===//
|
||||
// SI generation of devices and their respective sub classes
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
// The AMDILSIDevice is the base class for all Northern Island series of
|
||||
// cards. It is very similiar to the AMDILEvergreenDevice, with the major
|
||||
// exception being differences in wavefront size and hardware capabilities. The
|
||||
// SI devices are all 64 wide wavefronts and also add support for signed 24 bit
|
||||
// integer operations
|
||||
|
||||
class AMDILSIDevice : public AMDILEvergreenDevice {
|
||||
public:
|
||||
AMDILSIDevice(AMDILSubtarget*);
|
||||
virtual ~AMDILSIDevice();
|
||||
virtual size_t getMaxLDSSize() const;
|
||||
virtual uint32_t getGeneration() const;
|
||||
virtual std::string getDataLayout() const;
|
||||
protected:
|
||||
}; // AMDILSIDevice
|
||||
|
||||
} // namespace llvm
|
||||
#endif // _AMDILSIDEVICE_H_
|
|
@ -1,178 +0,0 @@
|
|||
//===- AMDILSubtarget.cpp - AMDIL Subtarget Information -------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the AMD IL specific subclass of TargetSubtarget.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDILSubtarget.h"
|
||||
#include "AMDIL.h"
|
||||
#include "AMDILDevices.h"
|
||||
#include "AMDILUtilityFunctions.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/MC/SubtargetFeature.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define GET_SUBTARGETINFO_ENUM
|
||||
#define GET_SUBTARGETINFO_CTOR
|
||||
#define GET_SUBTARGETINFO_TARGET_DESC
|
||||
#include "AMDGPUGenSubtargetInfo.inc"
|
||||
|
||||
AMDILSubtarget::AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS) : AMDILGenSubtargetInfo( TT, CPU, FS ),
|
||||
mDumpCode(false)
|
||||
{
|
||||
memset(CapsOverride, 0, sizeof(*CapsOverride)
|
||||
* AMDILDeviceInfo::MaxNumberCapabilities);
|
||||
// Default card
|
||||
std::string GPU = "rv770";
|
||||
GPU = CPU;
|
||||
mIs64bit = false;
|
||||
mVersion = 0;
|
||||
SmallVector<StringRef, DEFAULT_VEC_SLOTS> Features;
|
||||
SplitString(FS, Features, ",");
|
||||
mDefaultSize[0] = 64;
|
||||
mDefaultSize[1] = 1;
|
||||
mDefaultSize[2] = 1;
|
||||
std::string newFeatures = "";
|
||||
#if defined(_DEBUG) || defined(DEBUG)
|
||||
bool useTest = false;
|
||||
#endif
|
||||
for (size_t x = 0; x < Features.size(); ++x) {
|
||||
if (Features[x].startswith("+mwgs")) {
|
||||
SmallVector<StringRef, DEFAULT_VEC_SLOTS> sizes;
|
||||
SplitString(Features[x], sizes, "-");
|
||||
size_t mDim = ::atoi(sizes[1].data());
|
||||
if (mDim > 3) {
|
||||
mDim = 3;
|
||||
}
|
||||
for (size_t y = 0; y < mDim; ++y) {
|
||||
mDefaultSize[y] = ::atoi(sizes[y+2].data());
|
||||
}
|
||||
#if defined(_DEBUG) || defined(DEBUG)
|
||||
} else if (!Features[x].compare("test")) {
|
||||
useTest = true;
|
||||
#endif
|
||||
} else if (Features[x].startswith("+cal")) {
|
||||
SmallVector<StringRef, DEFAULT_VEC_SLOTS> version;
|
||||
SplitString(Features[x], version, "=");
|
||||
mVersion = ::atoi(version[1].data());
|
||||
} else {
|
||||
GPU = CPU;
|
||||
if (x > 0) newFeatures += ',';
|
||||
newFeatures += Features[x];
|
||||
}
|
||||
}
|
||||
// If we don't have a version then set it to
|
||||
// -1 which enables everything. This is for
|
||||
// offline devices.
|
||||
if (!mVersion) {
|
||||
mVersion = (uint32_t)-1;
|
||||
}
|
||||
for (int x = 0; x < 3; ++x) {
|
||||
if (!mDefaultSize[x]) {
|
||||
mDefaultSize[x] = 1;
|
||||
}
|
||||
}
|
||||
#if defined(_DEBUG) || defined(DEBUG)
|
||||
if (useTest) {
|
||||
GPU = "kauai";
|
||||
}
|
||||
#endif
|
||||
ParseSubtargetFeatures(GPU, newFeatures);
|
||||
#if defined(_DEBUG) || defined(DEBUG)
|
||||
if (useTest) {
|
||||
GPU = "test";
|
||||
}
|
||||
#endif
|
||||
mDevName = GPU;
|
||||
mDevice = AMDILDeviceInfo::getDeviceFromName(mDevName, this, mIs64bit);
|
||||
}
|
||||
AMDILSubtarget::~AMDILSubtarget()
|
||||
{
|
||||
delete mDevice;
|
||||
}
|
||||
bool
|
||||
AMDILSubtarget::isOverride(AMDILDeviceInfo::Caps caps) const
|
||||
{
|
||||
assert(caps < AMDILDeviceInfo::MaxNumberCapabilities &&
|
||||
"Caps index is out of bounds!");
|
||||
return CapsOverride[caps];
|
||||
}
|
||||
bool
|
||||
AMDILSubtarget::is64bit() const
|
||||
{
|
||||
return mIs64bit;
|
||||
}
|
||||
bool
|
||||
AMDILSubtarget::isTargetELF() const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
size_t
|
||||
AMDILSubtarget::getDefaultSize(uint32_t dim) const
|
||||
{
|
||||
if (dim > 3) {
|
||||
return 1;
|
||||
} else {
|
||||
return mDefaultSize[dim];
|
||||
}
|
||||
}
|
||||
uint32_t
|
||||
AMDILSubtarget::calVersion() const
|
||||
{
|
||||
return mVersion;
|
||||
}
|
||||
|
||||
AMDILGlobalManager*
|
||||
AMDILSubtarget::getGlobalManager() const
|
||||
{
|
||||
return mGM;
|
||||
}
|
||||
void
|
||||
AMDILSubtarget::setGlobalManager(AMDILGlobalManager *gm) const
|
||||
{
|
||||
mGM = gm;
|
||||
}
|
||||
|
||||
AMDILKernelManager*
|
||||
AMDILSubtarget::getKernelManager() const
|
||||
{
|
||||
return mKM;
|
||||
}
|
||||
void
|
||||
AMDILSubtarget::setKernelManager(AMDILKernelManager *km) const
|
||||
{
|
||||
mKM = km;
|
||||
}
|
||||
std::string
|
||||
AMDILSubtarget::getDataLayout() const
|
||||
{
|
||||
if (!mDevice) {
|
||||
return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
|
||||
"-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
|
||||
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
|
||||
"-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
|
||||
"-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
|
||||
}
|
||||
return mDevice->getDataLayout();
|
||||
}
|
||||
|
||||
std::string
|
||||
AMDILSubtarget::getDeviceName() const
|
||||
{
|
||||
return mDevName;
|
||||
}
|
||||
const AMDILDevice *
|
||||
AMDILSubtarget::device() const
|
||||
{
|
||||
return mDevice;
|
||||
}
|
|
@ -1,76 +0,0 @@
|
|||
//=====-- AMDILSubtarget.h - Define Subtarget for the AMDIL ----*- C++ -*-====//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file declares the AMDIL specific subclass of TargetSubtarget.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef _AMDILSUBTARGET_H_
|
||||
#define _AMDILSUBTARGET_H_
|
||||
|
||||
#include "AMDILDevice.h"
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/Target/TargetSubtargetInfo.h"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
|
||||
#define GET_SUBTARGETINFO_HEADER
|
||||
#include "AMDGPUGenSubtargetInfo.inc"
|
||||
|
||||
#define MAX_CB_SIZE (1 << 16)
|
||||
namespace llvm {
|
||||
class Module;
|
||||
class AMDILKernelManager;
|
||||
class AMDILGlobalManager;
|
||||
class AMDILDevice;
|
||||
class AMDILSubtarget : public AMDILGenSubtargetInfo {
|
||||
private:
|
||||
bool CapsOverride[AMDILDeviceInfo::MaxNumberCapabilities];
|
||||
mutable AMDILGlobalManager *mGM;
|
||||
mutable AMDILKernelManager *mKM;
|
||||
const AMDILDevice *mDevice;
|
||||
size_t mDefaultSize[3];
|
||||
std::string mDevName;
|
||||
uint32_t mVersion;
|
||||
bool mIs64bit;
|
||||
bool mIs32on64bit;
|
||||
bool mDumpCode;
|
||||
public:
|
||||
AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS);
|
||||
virtual ~AMDILSubtarget();
|
||||
bool isOverride(AMDILDeviceInfo::Caps) const;
|
||||
bool is64bit() const;
|
||||
|
||||
// Helper functions to simplify if statements
|
||||
bool isTargetELF() const;
|
||||
AMDILGlobalManager* getGlobalManager() const;
|
||||
void setGlobalManager(AMDILGlobalManager *gm) const;
|
||||
AMDILKernelManager* getKernelManager() const;
|
||||
void setKernelManager(AMDILKernelManager *gm) const;
|
||||
const AMDILDevice* device() const;
|
||||
std::string getDataLayout() const;
|
||||
std::string getDeviceName() const;
|
||||
virtual size_t getDefaultSize(uint32_t dim) const;
|
||||
// Return the version of CAL that the backend should target.
|
||||
uint32_t calVersion() const;
|
||||
// ParseSubtargetFeatures - Parses features string setting specified
|
||||
// subtarget options. Definition of function is
|
||||
//auto generated by tblgen.
|
||||
void
|
||||
ParseSubtargetFeatures(
|
||||
llvm::StringRef CPU,
|
||||
llvm::StringRef FS);
|
||||
bool dumpCode() const { return mDumpCode; }
|
||||
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // AMDILSUBTARGET_H_
|
|
@ -1,120 +0,0 @@
|
|||
//===-- AMDILTokenDesc.td - AMDIL Token Definitions --*- tablegen -*-----===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
include "AMDILEnumeratedTypes.td"
|
||||
|
||||
// Each token is 32 bits as specified in section 2.1 of the IL spec
|
||||
class ILToken <bits<32> n> {
|
||||
field bits<32> _bits = n;
|
||||
}
|
||||
|
||||
// Section 2.2.1 - IL Language Token
|
||||
class ILLang<bits<8> client_type> : ILToken<0> {
|
||||
let _bits{0-7} = client_type;
|
||||
}
|
||||
|
||||
// Section 2.2.2 - IL Version Token
|
||||
class ILVersion<bits<8> minor_version, bits<8> major_version, ILShader shader_type> : ILToken<0> {
|
||||
let _bits{0-7} = minor_version;
|
||||
let _bits{8-15} = major_version;
|
||||
let _bits{16-23} = shader_type.Value;
|
||||
}
|
||||
|
||||
// Section 2.2.3 - IL Opcode Token
|
||||
class ILOpcode<ILOpCode opcode, bits<14> control, bit sec_mod_pre, bit pri_mod_pre> : ILToken<0> {
|
||||
let _bits{0-15} = opcode.Value;
|
||||
let _bits{16-29} = control;
|
||||
let _bits{30} = sec_mod_pre;
|
||||
let _bits{31} = pri_mod_pre;
|
||||
}
|
||||
|
||||
// Section 2.2.4 - IL Destination Token
|
||||
class ILDst<AMDILReg register_num, ILRegType register_type, bit mod_pre, bits<2> relative_address, bit dimension, bit immediate_pre, bit extended> : ILToken<0> {
|
||||
let _bits{0-15} = register_num.Value;
|
||||
let _bits{16-21} = register_type.Value;
|
||||
let _bits{22} = mod_pre;
|
||||
let _bits{23-24} = relative_address;
|
||||
let _bits{25} = dimension;
|
||||
let _bits{26} = immediate_pre;
|
||||
let _bits{31} = extended;
|
||||
}
|
||||
|
||||
// Section 2.2.5 - IL Destination Modifier Token
|
||||
class ILDstMod<ILModDstComp x, ILModDstComp y, ILModDstComp z, ILModDstComp w, bit clamp, ILShiftScale shift_scale> : ILToken<0> {
|
||||
let _bits{0-1} = x.Value;
|
||||
let _bits{2-3} = y.Value;
|
||||
let _bits{4-5} = z.Value;
|
||||
let _bits{6-7} = w.Value;
|
||||
let _bits{8} = clamp;
|
||||
//let _bits{9-12} = shift_scale;
|
||||
}
|
||||
|
||||
// Section 2.2.6 - IL Source Token
|
||||
class ILSrc<AMDILReg register_num, ILRegType register_type, bit mod_pre, bits<2> relative_address, bit dimension, bit immediate_pre, bit extended> : ILToken<0> {
|
||||
let _bits{0-15} = register_num.Value;
|
||||
let _bits{16-21} = register_type.Value;
|
||||
let _bits{22} = mod_pre;
|
||||
let _bits{23-24} = relative_address;
|
||||
let _bits{25} = dimension;
|
||||
let _bits{26} = immediate_pre;
|
||||
let _bits{31} = extended;
|
||||
}
|
||||
|
||||
// Section 2.2.7 - IL Source Modifier Token
|
||||
class ILSrcMod<ILComponentSelect swizzle_x, bit negate_x, ILComponentSelect swizzle_y, bit negate_y,
|
||||
ILComponentSelect swizzle_z, bit negate_z, ILComponentSelect swizzle_w, bit negate_w,
|
||||
bit invert, bit bias, bit x2, bit sign, bit abs, ILDivComp divComp,
|
||||
bits<8> clamp> : ILToken<0> {
|
||||
let _bits{0-2} = swizzle_x.Value;
|
||||
let _bits{3} = negate_x;
|
||||
let _bits{4-6} = swizzle_y.Value;
|
||||
let _bits{7} = negate_y;
|
||||
let _bits{8-10} = swizzle_z.Value;
|
||||
let _bits{11} = negate_z;
|
||||
let _bits{12-14} = swizzle_w.Value;
|
||||
let _bits{15} = negate_w;
|
||||
let _bits{16} = invert;
|
||||
let _bits{17} = bias;
|
||||
let _bits{18} = x2;
|
||||
let _bits{19} = sign;
|
||||
let _bits{20} = abs;
|
||||
let _bits{21-23} = divComp.Value;
|
||||
let _bits{24-31} = clamp;
|
||||
}
|
||||
|
||||
// Section 2.2.8 - IL Relative Address Token
|
||||
class ILRelAddr<AMDILReg address_register, bit loop_relative, ILAddressing component> : ILToken<0> {
|
||||
let _bits{0-15} = address_register.Value;
|
||||
let _bits{16} = loop_relative;
|
||||
let _bits{17-19} = component.Value;
|
||||
}
|
||||
|
||||
// IL Literal Token
|
||||
class ILLiteral<bits<32> val> : ILToken<0> {
|
||||
let _bits = val;
|
||||
}
|
||||
|
||||
// All tokens required for a destination register
|
||||
class ILDstReg<ILDst Reg, ILDstMod Mod, ILRelAddr Rel, ILSrc Reg_Rel, ILSrcMod Reg_Rel_Mod> {
|
||||
ILDst reg = Reg;
|
||||
ILDstMod mod = Mod;
|
||||
ILRelAddr rel = Rel;
|
||||
ILSrc reg_rel = Reg_Rel;
|
||||
ILSrcMod reg_rel_mod = Reg_Rel_Mod;
|
||||
}
|
||||
|
||||
// All tokens required for a source register
|
||||
class ILSrcReg<ILSrc Reg, ILSrcMod Mod, ILRelAddr Rel, ILSrc Reg_Rel, ILSrcMod Reg_Rel_Mod> {
|
||||
ILSrc reg = Reg;
|
||||
ILSrcMod mod = Mod;
|
||||
ILRelAddr rel = Rel;
|
||||
ILSrc reg_rel = Reg_Rel;
|
||||
ILSrcMod reg_rel_mod = Reg_Rel_Mod;
|
||||
}
|
||||
|
|
@ -1,75 +0,0 @@
|
|||
//===-- AMDILUtilityFunctions.h - AMDIL Utility Functions Header --------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file provides helper macros for expanding case statements.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef AMDILUTILITYFUNCTIONS_H_
|
||||
#define AMDILUTILITYFUNCTIONS_H_
|
||||
|
||||
// Macros that are used to help with switch statements for various data types
|
||||
// However, these macro's do not return anything unlike the second set below.
|
||||
#define ExpandCaseTo32bitIntTypes(Instr) \
|
||||
case Instr##_i32:
|
||||
|
||||
#define ExpandCaseTo32bitIntTruncTypes(Instr) \
|
||||
case Instr##_i32i8: \
|
||||
case Instr##_i32i16:
|
||||
|
||||
#define ExpandCaseToIntTypes(Instr) \
|
||||
ExpandCaseTo32bitIntTypes(Instr)
|
||||
|
||||
#define ExpandCaseToIntTruncTypes(Instr) \
|
||||
ExpandCaseTo32bitIntTruncTypes(Instr)
|
||||
|
||||
#define ExpandCaseToFloatTypes(Instr) \
|
||||
case Instr##_f32:
|
||||
|
||||
#define ExpandCaseTo32bitScalarTypes(Instr) \
|
||||
ExpandCaseTo32bitIntTypes(Instr) \
|
||||
case Instr##_f32:
|
||||
|
||||
#define ExpandCaseToAllScalarTypes(Instr) \
|
||||
ExpandCaseToFloatTypes(Instr) \
|
||||
ExpandCaseToIntTypes(Instr)
|
||||
|
||||
#define ExpandCaseToAllScalarTruncTypes(Instr) \
|
||||
ExpandCaseToFloatTruncTypes(Instr) \
|
||||
ExpandCaseToIntTruncTypes(Instr)
|
||||
|
||||
#define ExpandCaseToAllTypes(Instr) \
|
||||
ExpandCaseToAllScalarTypes(Instr)
|
||||
|
||||
#define ExpandCaseToAllTruncTypes(Instr) \
|
||||
ExpandCaseToAllScalarTruncTypes(Instr)
|
||||
|
||||
// Macros that expand into statements with return values
|
||||
#define ExpandCaseTo32bitIntReturn(Instr, Return) \
|
||||
case Instr##_i32: return Return##_i32;
|
||||
|
||||
#define ExpandCaseToIntReturn(Instr, Return) \
|
||||
ExpandCaseTo32bitIntReturn(Instr, Return)
|
||||
|
||||
#define ExpandCaseToFloatReturn(Instr, Return) \
|
||||
case Instr##_f32: return Return##_f32;\
|
||||
|
||||
#define ExpandCaseToAllScalarReturn(Instr, Return) \
|
||||
ExpandCaseToFloatReturn(Instr, Return) \
|
||||
ExpandCaseToIntReturn(Instr, Return)
|
||||
|
||||
// These macros expand to common groupings of RegClass ID's
|
||||
#define ExpandCaseTo1CompRegID \
|
||||
case AMDGPU::GPRI32RegClassID: \
|
||||
case AMDGPU::GPRF32RegClassID:
|
||||
|
||||
#define ExpandCaseTo32BitType(Instr) \
|
||||
case Instr##_i32: \
|
||||
case Instr##_f32:
|
||||
|
||||
#endif // AMDILUTILITYFUNCTIONS_H_
|
|
@ -1,58 +0,0 @@
|
|||
//===-- AMDILVersion.td - Barrier Instruction/Intrinsic definitions------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Intrinsic operation support
|
||||
//===--------------------------------------------------------------------===//
|
||||
let TargetPrefix = "AMDIL", isTarget = 1 in {
|
||||
def int_AMDIL_barrier : GCCBuiltin<"barrier">,
|
||||
BinaryIntNoRetInt;
|
||||
def int_AMDIL_barrier_global : GCCBuiltin<"barrierGlobal">,
|
||||
BinaryIntNoRetInt;
|
||||
def int_AMDIL_barrier_local : GCCBuiltin<"barrierLocal">,
|
||||
BinaryIntNoRetInt;
|
||||
def int_AMDIL_barrier_region : GCCBuiltin<"barrierRegion">,
|
||||
BinaryIntNoRetInt;
|
||||
def int_AMDIL_get_region_id : GCCBuiltin<"__amdil_get_region_id_int">,
|
||||
Intrinsic<[llvm_v4i32_ty], [], []>;
|
||||
def int_AMDIL_get_region_local_id : GCCBuiltin<"__amdil_get_region_local_id_int">,
|
||||
Intrinsic<[llvm_v4i32_ty], [], []>;
|
||||
def int_AMDIL_get_num_regions : GCCBuiltin<"__amdil_get_num_regions_int">,
|
||||
Intrinsic<[llvm_v4i32_ty], [], []>;
|
||||
def int_AMDIL_get_region_size : GCCBuiltin<"__amdil_get_region_size_int">,
|
||||
Intrinsic<[llvm_v4i32_ty], [], []>;
|
||||
}
|
||||
|
||||
let isCall=1, isNotDuplicable=1 in {
|
||||
let Predicates=[hasRegionAS] in {
|
||||
def BARRIER_EGNI : BinaryOpNoRet<IL_OP_BARRIER, (outs),
|
||||
(ins GPRI32:$flag, GPRI32:$id),
|
||||
"fence_threads_memory_lds_gds_gws",
|
||||
[(int_AMDIL_barrier GPRI32:$flag, GPRI32:$id)]>;
|
||||
}
|
||||
let Predicates=[noRegionAS] in {
|
||||
def BARRIER_7XX : BinaryOpNoRet<IL_OP_BARRIER, (outs),
|
||||
(ins GPRI32:$flag, GPRI32:$id),
|
||||
"fence_threads_memory_lds",
|
||||
[(int_AMDIL_barrier GPRI32:$flag, GPRI32:$id)]>;
|
||||
}
|
||||
|
||||
def BARRIER_LOCAL : BinaryOpNoRet<IL_OP_BARRIER_LOCAL, (outs),
|
||||
(ins GPRI32:$flag, GPRI32:$id),
|
||||
"fence_threads_lds",
|
||||
[(int_AMDIL_barrier_local GPRI32:$flag, GPRI32:$id)]>;
|
||||
|
||||
def BARRIER_GLOBAL : BinaryOpNoRet<IL_OP_BARRIER_GLOBAL, (outs),
|
||||
(ins GPRI32:$flag, GPRI32:$id),
|
||||
"fence_threads_memory",
|
||||
[(int_AMDIL_barrier_global GPRI32:$flag, GPRI32:$id)]>;
|
||||
|
||||
def BARRIER_REGION : BinaryOpNoRet<IL_OP_BARRIER_REGION, (outs),
|
||||
(ins GPRI32:$flag, GPRI32:$id),
|
||||
"fence_threads_gds",
|
||||
[(int_AMDIL_barrier_region GPRI32:$flag, GPRI32:$id)]>;
|
||||
}
|
|
@ -1,50 +0,0 @@
|
|||
set(LLVM_TARGET_DEFINITIONS AMDGPU.td)
|
||||
|
||||
tablegen(LLVM AMDGPUGenRegisterInfo.inc -gen-register-info)
|
||||
tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info)
|
||||
tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel)
|
||||
tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv)
|
||||
tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget)
|
||||
tablegen(LLVM AMDGPUGenIntrinsics.inc -gen-tgt-intrinsic)
|
||||
tablegen(LLVM AMDGPUGenCodeEmitter.inc -gen-emitter)
|
||||
tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer)
|
||||
add_public_tablegen_target(AMDGPUCommonTableGen)
|
||||
|
||||
add_llvm_target(AMDGPUCodeGen
|
||||
AMDIL7XXDevice.cpp
|
||||
AMDILCFGStructurizer.cpp
|
||||
AMDILDevice.cpp
|
||||
AMDILDeviceInfo.cpp
|
||||
AMDILEvergreenDevice.cpp
|
||||
AMDILFrameLowering.cpp
|
||||
AMDILInstrInfo.cpp
|
||||
AMDILIntrinsicInfo.cpp
|
||||
AMDILISelDAGToDAG.cpp
|
||||
AMDILISelLowering.cpp
|
||||
AMDILNIDevice.cpp
|
||||
AMDILPeepholeOptimizer.cpp
|
||||
AMDILRegisterInfo.cpp
|
||||
AMDILSIDevice.cpp
|
||||
AMDILSubtarget.cpp
|
||||
AMDGPUTargetMachine.cpp
|
||||
AMDGPUISelLowering.cpp
|
||||
AMDGPUConvertToISA.cpp
|
||||
AMDGPUInstrInfo.cpp
|
||||
AMDGPURegisterInfo.cpp
|
||||
AMDGPUUtil.cpp
|
||||
R600CodeEmitter.cpp
|
||||
R600InstrInfo.cpp
|
||||
R600ISelLowering.cpp
|
||||
R600KernelParameters.cpp
|
||||
R600MachineFunctionInfo.cpp
|
||||
R600RegisterInfo.cpp
|
||||
SIAssignInterpRegs.cpp
|
||||
SICodeEmitter.cpp
|
||||
SIInstrInfo.cpp
|
||||
SIISelLowering.cpp
|
||||
SIMachineFunctionInfo.cpp
|
||||
SIRegisterInfo.cpp
|
||||
)
|
||||
|
||||
add_subdirectory(TargetInfo)
|
||||
add_subdirectory(MCTargetDesc)
|
|
@ -1,13 +0,0 @@
|
|||
There are 3 files used by this backend that are generated by perl scripts:
|
||||
|
||||
- R600RegisterInfo.td
|
||||
+ Generated with:
|
||||
perl R600GenRegisterInfo.pl > R600RegisterInfo.td
|
||||
|
||||
- R600HwRegInfo.include
|
||||
+ Generated with:
|
||||
perl R600GenRegisterInfo.pl
|
||||
|
||||
- SIRegisterInfo.td
|
||||
+ Generated with:
|
||||
perl SIGenRegisterInfo.pl > SIRegisterInfo.td
|
|
@ -1,32 +0,0 @@
|
|||
;===- ./lib/Target/AMDIL/LLVMBuild.txt -------------------------*- Conf -*--===;
|
||||
;
|
||||
; The LLVM Compiler Infrastructure
|
||||
;
|
||||
; This file is distributed under the University of Illinois Open Source
|
||||
; License. See LICENSE.TXT for details.
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
;
|
||||
; This is an LLVMBuild description file for the components in this subdirectory.
|
||||
;
|
||||
; For more information on the LLVMBuild system, please see:
|
||||
;
|
||||
; http://llvm.org/docs/LLVMBuild.html
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
|
||||
[common]
|
||||
subdirectories = MCTargetDesc TargetInfo
|
||||
|
||||
[component_0]
|
||||
type = TargetGroup
|
||||
name = AMDGPU
|
||||
parent = Target
|
||||
has_asmprinter = 0
|
||||
|
||||
[component_1]
|
||||
type = Library
|
||||
name = AMDGPUCodeGen
|
||||
parent = AMDGPU
|
||||
required_libraries = AsmPrinter CodeGen Core SelectionDAG Support Target MC AMDGPUInfo AMDGPUDesc
|
||||
add_to_library_groups = AMDGPU
|
|
@ -1,104 +0,0 @@
|
|||
//===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - TODO: Add brief description -------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// TODO: Add full description
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUMCAsmInfo.h"
|
||||
#ifndef NULL
|
||||
#define NULL 0
|
||||
#endif
|
||||
|
||||
using namespace llvm;
|
||||
AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo()
|
||||
{
|
||||
//===------------------------------------------------------------------===//
|
||||
HasSubsectionsViaSymbols = true;
|
||||
HasMachoZeroFillDirective = false;
|
||||
HasMachoTBSSDirective = false;
|
||||
HasStaticCtorDtorReferenceInStaticMode = false;
|
||||
LinkerRequiresNonEmptyDwarfLines = true;
|
||||
MaxInstLength = 16;
|
||||
PCSymbol = "$";
|
||||
SeparatorString = "\n";
|
||||
CommentColumn = 40;
|
||||
CommentString = ";";
|
||||
LabelSuffix = ":";
|
||||
GlobalPrefix = "@";
|
||||
PrivateGlobalPrefix = ";.";
|
||||
LinkerPrivateGlobalPrefix = "!";
|
||||
InlineAsmStart = ";#ASMSTART";
|
||||
InlineAsmEnd = ";#ASMEND";
|
||||
AssemblerDialect = 0;
|
||||
AllowQuotesInName = false;
|
||||
AllowNameToStartWithDigit = false;
|
||||
AllowPeriodsInName = false;
|
||||
|
||||
//===--- Data Emission Directives -------------------------------------===//
|
||||
ZeroDirective = ".zero";
|
||||
AsciiDirective = ".ascii\t";
|
||||
AscizDirective = ".asciz\t";
|
||||
Data8bitsDirective = ".byte\t";
|
||||
Data16bitsDirective = ".short\t";
|
||||
Data32bitsDirective = ".long\t";
|
||||
Data64bitsDirective = ".quad\t";
|
||||
GPRel32Directive = NULL;
|
||||
SunStyleELFSectionSwitchSyntax = true;
|
||||
UsesELFSectionDirectiveForBSS = true;
|
||||
HasMicrosoftFastStdCallMangling = false;
|
||||
|
||||
//===--- Alignment Information ----------------------------------------===//
|
||||
AlignDirective = ".align\t";
|
||||
AlignmentIsInBytes = true;
|
||||
TextAlignFillValue = 0;
|
||||
|
||||
//===--- Global Variable Emission Directives --------------------------===//
|
||||
GlobalDirective = ".global";
|
||||
ExternDirective = ".extern";
|
||||
HasSetDirective = false;
|
||||
HasAggressiveSymbolFolding = true;
|
||||
LCOMMDirectiveType = LCOMM::None;
|
||||
COMMDirectiveAlignmentIsInBytes = false;
|
||||
HasDotTypeDotSizeDirective = false;
|
||||
HasSingleParameterDotFile = true;
|
||||
HasNoDeadStrip = true;
|
||||
HasSymbolResolver = false;
|
||||
WeakRefDirective = ".weakref\t";
|
||||
WeakDefDirective = ".weakdef\t";
|
||||
LinkOnceDirective = NULL;
|
||||
HiddenVisibilityAttr = MCSA_Hidden;
|
||||
HiddenDeclarationVisibilityAttr = MCSA_Hidden;
|
||||
ProtectedVisibilityAttr = MCSA_Protected;
|
||||
|
||||
//===--- Dwarf Emission Directives -----------------------------------===//
|
||||
HasLEB128 = true;
|
||||
SupportsDebugInformation = true;
|
||||
ExceptionsType = ExceptionHandling::None;
|
||||
DwarfUsesInlineInfoSection = false;
|
||||
DwarfSectionOffsetDirective = ".offset";
|
||||
|
||||
}
|
||||
const char*
|
||||
AMDGPUMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const
|
||||
{
|
||||
switch (AS) {
|
||||
default:
|
||||
return NULL;
|
||||
case 0:
|
||||
return NULL;
|
||||
};
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const MCSection*
|
||||
AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const
|
||||
{
|
||||
return NULL;
|
||||
}
|
|
@ -1,30 +0,0 @@
|
|||
//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - TODO: Add brief description -------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// TODO: Add full description
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDGPUMCASMINFO_H_
|
||||
#define AMDGPUMCASMINFO_H_
|
||||
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
namespace llvm {
|
||||
class Target;
|
||||
class StringRef;
|
||||
|
||||
class AMDGPUMCAsmInfo : public MCAsmInfo {
|
||||
public:
|
||||
explicit AMDGPUMCAsmInfo(const Target &T, StringRef &TT);
|
||||
const char*
|
||||
getDataASDirective(unsigned int Size, unsigned int AS) const;
|
||||
const MCSection* getNonexecutableStackSection(MCContext &CTX) const;
|
||||
};
|
||||
} // namespace llvm
|
||||
#endif // AMDGPUMCASMINFO_H_
|
|
@ -1,61 +0,0 @@
|
|||
#include "AMDGPUMCTargetDesc.h"
|
||||
#include "AMDGPUMCAsmInfo.h"
|
||||
#include "llvm/MC/MachineLocation.h"
|
||||
#include "llvm/MC/MCCodeGenInfo.h"
|
||||
#include "llvm/MC/MCInstrInfo.h"
|
||||
#include "llvm/MC/MCRegisterInfo.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
|
||||
#define GET_INSTRINFO_MC_DESC
|
||||
#include "AMDGPUGenInstrInfo.inc"
|
||||
|
||||
#define GET_SUBTARGETINFO_MC_DESC
|
||||
#include "AMDGPUGenSubtargetInfo.inc"
|
||||
|
||||
#define GET_REGINFO_MC_DESC
|
||||
#include "AMDGPUGenRegisterInfo.inc"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static MCInstrInfo *createAMDGPUMCInstrInfo() {
|
||||
MCInstrInfo *X = new MCInstrInfo();
|
||||
InitAMDILMCInstrInfo(X);
|
||||
return X;
|
||||
}
|
||||
|
||||
static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) {
|
||||
MCRegisterInfo *X = new MCRegisterInfo();
|
||||
InitAMDILMCRegisterInfo(X, 0);
|
||||
return X;
|
||||
}
|
||||
|
||||
static MCSubtargetInfo *createAMDGPUMCSubtargetInfo(StringRef TT, StringRef CPU,
|
||||
StringRef FS) {
|
||||
MCSubtargetInfo * X = new MCSubtargetInfo();
|
||||
InitAMDILMCSubtargetInfo(X, TT, CPU, FS);
|
||||
return X;
|
||||
}
|
||||
|
||||
static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
|
||||
CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL) {
|
||||
MCCodeGenInfo *X = new MCCodeGenInfo();
|
||||
X->InitMCCodeGenInfo(RM, CM, OL);
|
||||
return X;
|
||||
}
|
||||
|
||||
extern "C" void LLVMInitializeAMDGPUTargetMC() {
|
||||
|
||||
RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget);
|
||||
|
||||
TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo);
|
||||
|
||||
TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo);
|
||||
|
||||
TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo);
|
||||
|
||||
TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo);
|
||||
|
||||
}
|
|
@ -1,35 +0,0 @@
|
|||
//===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file provides AMDGPU specific target descriptions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
||||
#ifndef AMDGPUMCTARGETDESC_H
|
||||
#define AMDGPUMCTARGETDESC_H
|
||||
|
||||
namespace llvm {
|
||||
class MCSubtargetInfo;
|
||||
class Target;
|
||||
|
||||
extern Target TheAMDGPUTarget;
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#define GET_REGINFO_ENUM
|
||||
#include "AMDGPUGenRegisterInfo.inc"
|
||||
|
||||
#define GET_INSTRINFO_ENUM
|
||||
#include "AMDGPUGenInstrInfo.inc"
|
||||
|
||||
#define GET_SUBTARGETINFO_ENUM
|
||||
#include "AMDGPUGenSubtargetInfo.inc"
|
||||
|
||||
#endif // AMDGPUMCTARGETDESC_H
|
|
@ -1,7 +0,0 @@
|
|||
|
||||
add_llvm_library(LLVMAMDGPUDesc
|
||||
AMDGPUMCTargetDesc.cpp
|
||||
AMDGPUMCAsmInfo.cpp
|
||||
)
|
||||
|
||||
add_dependencies(LLVMAMDGPUDesc AMDGPUCommonTableGen)
|
|
@ -1,23 +0,0 @@
|
|||
;===- ./lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
|
||||
;
|
||||
; The LLVM Compiler Infrastructure
|
||||
;
|
||||
; This file is distributed under the University of Illinois Open Source
|
||||
; License. See LICENSE.TXT for details.
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
;
|
||||
; This is an LLVMBuild description file for the components in this subdirectory.
|
||||
;
|
||||
; For more information on the LLVMBuild system, please see:
|
||||
;
|
||||
; http://llvm.org/docs/LLVMBuild.html
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
|
||||
[component_0]
|
||||
type = Library
|
||||
name = AMDGPUDesc
|
||||
parent = AMDGPU
|
||||
required_libraries = AMDGPUInfo MC
|
||||
add_to_library_groups = AMDGPU
|
|
@ -1,16 +0,0 @@
|
|||
##===- lib/Target/AMDGPU/TargetDesc/Makefile ----------------*- Makefile -*-===##
|
||||
#
|
||||
# The LLVM Compiler Infrastructure
|
||||
#
|
||||
# This file is distributed under the University of Illinois Open Source
|
||||
# License. See LICENSE.TXT for details.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
LEVEL = ../../../..
|
||||
LIBRARYNAME = LLVMAMDGPUDesc
|
||||
|
||||
# Hack: we need to include 'main' target directory to grab private headers
|
||||
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
|
||||
|
||||
include $(LEVEL)/Makefile.common
|
|
@ -1,22 +0,0 @@
|
|||
##===- lib/Target/AMDGPU/Makefile ---------------------------*- Makefile -*-===##
|
||||
#
|
||||
# The LLVM Compiler Infrastructure
|
||||
#
|
||||
# This file is distributed under the University of Illinois Open Source
|
||||
# License. See LICENSE.TXT for details.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
LEVEL = ../../..
|
||||
LIBRARYNAME = LLVMAMDGPUCodeGen
|
||||
TARGET = AMDGPU
|
||||
|
||||
# Make sure that tblgen is run, first thing.
|
||||
BUILT_SOURCES = AMDGPUGenRegisterInfo.inc AMDGPUGenInstrInfo.inc \
|
||||
AMDGPUGenDAGISel.inc AMDGPUGenSubtargetInfo.inc \
|
||||
AMDGPUGenCodeEmitter.inc AMDGPUGenCallingConv.inc \
|
||||
AMDGPUGenIntrinsics.inc AMDGPUGenDFAPacketizer.inc \
|
||||
|
||||
DIRS = TargetInfo MCTargetDesc
|
||||
|
||||
include $(LEVEL)/Makefile.common
|
|
@ -1,27 +0,0 @@
|
|||
//===-- Processors.td - TODO: Add brief description -------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// AMDIL processors supported.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
|
||||
: Processor<Name, itin, Features>;
|
||||
def : Proc<"rv710", R600_EG_Itin, []>;
|
||||
def : Proc<"rv730", R600_EG_Itin, []>;
|
||||
def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>;
|
||||
def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||
def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||
def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||
def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
|
||||
def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||
def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||
def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||
def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
|
||||
def : Proc<"SI", SI_Itin, []>;
|
|
@ -1,614 +0,0 @@
|
|||
//===-- R600CodeEmitter.cpp - Code Emitter for R600->Cayman GPU families --===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This code emitters outputs bytecode that is understood by the r600g driver
|
||||
// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA,
|
||||
// except that the size of the instruction fields are rounded up to the
|
||||
// nearest byte.
|
||||
//
|
||||
// [1] http://www.mesa3d.org/
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUUtil.h"
|
||||
#include "AMDILCodeEmitter.h"
|
||||
#include "AMDILInstrInfo.h"
|
||||
#include "AMDILUtilityFunctions.h"
|
||||
#include "R600InstrInfo.h"
|
||||
#include "R600RegisterInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/Support/DataTypes.h"
|
||||
#include "llvm/Support/FormattedStream.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#define SRC_BYTE_COUNT 11
|
||||
#define DST_BYTE_COUNT 5
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class R600CodeEmitter : public MachineFunctionPass, public AMDILCodeEmitter {
|
||||
|
||||
private:
|
||||
|
||||
static char ID;
|
||||
formatted_raw_ostream &_OS;
|
||||
const TargetMachine * TM;
|
||||
const MachineRegisterInfo * MRI;
|
||||
const R600RegisterInfo * TRI;
|
||||
|
||||
bool IsCube;
|
||||
bool IsReduction;
|
||||
bool IsVector;
|
||||
unsigned currentElement;
|
||||
bool IsLast;
|
||||
|
||||
public:
|
||||
|
||||
R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
|
||||
_OS(OS), TM(NULL), IsCube(false), IsReduction(false), IsVector(false),
|
||||
IsLast(true) { }
|
||||
|
||||
const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF);
|
||||
virtual uint64_t getMachineOpValue(const MachineInstr &MI,
|
||||
const MachineOperand &MO) const;
|
||||
|
||||
private:
|
||||
|
||||
void EmitALUInstr(MachineInstr &MI);
|
||||
void EmitSrc(const MachineOperand & MO, int chan_override = -1);
|
||||
void EmitDst(const MachineOperand & MO);
|
||||
void EmitALU(MachineInstr &MI, unsigned numSrc);
|
||||
void EmitTexInstr(MachineInstr &MI);
|
||||
void EmitFCInstr(MachineInstr &MI);
|
||||
|
||||
void EmitNullBytes(unsigned int byteCount);
|
||||
|
||||
void EmitByte(unsigned int byte);
|
||||
|
||||
void EmitTwoBytes(uint32_t bytes);
|
||||
|
||||
void Emit(uint32_t value);
|
||||
void Emit(uint64_t value);
|
||||
|
||||
unsigned getHWReg(unsigned regNo) const;
|
||||
|
||||
};
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
enum RegElement {
|
||||
ELEMENT_X = 0,
|
||||
ELEMENT_Y,
|
||||
ELEMENT_Z,
|
||||
ELEMENT_W
|
||||
};
|
||||
|
||||
enum InstrTypes {
|
||||
INSTR_ALU = 0,
|
||||
INSTR_TEX,
|
||||
INSTR_FC,
|
||||
INSTR_NATIVE,
|
||||
INSTR_VTX
|
||||
};
|
||||
|
||||
enum FCInstr {
|
||||
FC_IF = 0,
|
||||
FC_ELSE,
|
||||
FC_ENDIF,
|
||||
FC_BGNLOOP,
|
||||
FC_ENDLOOP,
|
||||
FC_BREAK,
|
||||
FC_BREAK_NZ_INT,
|
||||
FC_CONTINUE,
|
||||
FC_BREAK_Z_INT
|
||||
};
|
||||
|
||||
enum TextureTypes {
|
||||
TEXTURE_1D = 1,
|
||||
TEXTURE_2D,
|
||||
TEXTURE_3D,
|
||||
TEXTURE_CUBE,
|
||||
TEXTURE_RECT,
|
||||
TEXTURE_SHADOW1D,
|
||||
TEXTURE_SHADOW2D,
|
||||
TEXTURE_SHADOWRECT,
|
||||
TEXTURE_1D_ARRAY,
|
||||
TEXTURE_2D_ARRAY,
|
||||
TEXTURE_SHADOW1D_ARRAY,
|
||||
TEXTURE_SHADOW2D_ARRAY
|
||||
};
|
||||
|
||||
char R600CodeEmitter::ID = 0;
|
||||
|
||||
FunctionPass *llvm::createR600CodeEmitterPass(formatted_raw_ostream &OS) {
|
||||
return new R600CodeEmitter(OS);
|
||||
}
|
||||
|
||||
bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
TM = &MF.getTarget();
|
||||
MRI = &MF.getRegInfo();
|
||||
TRI = static_cast<const R600RegisterInfo *>(TM->getRegisterInfo());
|
||||
const R600InstrInfo * TII = static_cast<const R600InstrInfo *>(TM->getInstrInfo());
|
||||
const AMDILSubtarget &STM = TM->getSubtarget<AMDILSubtarget>();
|
||||
std::string gpu = STM.getDeviceName();
|
||||
|
||||
if (STM.dumpCode()) {
|
||||
MF.dump();
|
||||
}
|
||||
|
||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||
BB != BB_E; ++BB) {
|
||||
MachineBasicBlock &MBB = *BB;
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||
I != E; ++I) {
|
||||
MachineInstr &MI = *I;
|
||||
IsReduction = AMDGPU::isReductionOp(MI.getOpcode());
|
||||
IsVector = TII->isVector(MI);
|
||||
IsCube = AMDGPU::isCubeOp(MI.getOpcode());
|
||||
if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) {
|
||||
continue;
|
||||
}
|
||||
if (AMDGPU::isTexOp(MI.getOpcode())) {
|
||||
EmitTexInstr(MI);
|
||||
} else if (AMDGPU::isFCOp(MI.getOpcode())){
|
||||
EmitFCInstr(MI);
|
||||
} else if (IsReduction || IsVector || IsCube) {
|
||||
IsLast = false;
|
||||
// XXX: On Cayman, some (all?) of the vector instructions only need
|
||||
// to fill the first three slots.
|
||||
for (currentElement = 0; currentElement < 4; currentElement++) {
|
||||
IsLast = (currentElement == 3);
|
||||
EmitALUInstr(MI);
|
||||
}
|
||||
IsReduction = false;
|
||||
IsVector = false;
|
||||
IsCube = false;
|
||||
} else if (MI.getOpcode() == AMDGPU::RETURN ||
|
||||
MI.getOpcode() == AMDGPU::BUNDLE ||
|
||||
MI.getOpcode() == AMDGPU::KILL) {
|
||||
continue;
|
||||
} else {
|
||||
switch(MI.getOpcode()) {
|
||||
case AMDGPU::RAT_WRITE_CACHELESS_eg:
|
||||
{
|
||||
uint64_t inst = getBinaryCodeForInstr(MI);
|
||||
// Set End Of Program bit
|
||||
// XXX: Need better check of end of program. EOP should be
|
||||
// encoded in one of the operands of the MI, and it should be
|
||||
// set in a prior pass.
|
||||
MachineBasicBlock::iterator NextI = llvm::next(I);
|
||||
MachineInstr &NextMI = *NextI;
|
||||
if (NextMI.getOpcode() == AMDGPU::RETURN) {
|
||||
inst |= (((uint64_t)1) << 53);
|
||||
}
|
||||
EmitByte(INSTR_NATIVE);
|
||||
Emit(inst);
|
||||
break;
|
||||
}
|
||||
case AMDGPU::VTX_READ_PARAM_i32_eg:
|
||||
case AMDGPU::VTX_READ_PARAM_f32_eg:
|
||||
case AMDGPU::VTX_READ_GLOBAL_i32_eg:
|
||||
case AMDGPU::VTX_READ_GLOBAL_f32_eg:
|
||||
case AMDGPU::VTX_READ_GLOBAL_v4i32_eg:
|
||||
case AMDGPU::VTX_READ_GLOBAL_v4f32_eg:
|
||||
{
|
||||
uint64_t InstWord01 = getBinaryCodeForInstr(MI);
|
||||
uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
|
||||
|
||||
EmitByte(INSTR_VTX);
|
||||
Emit(InstWord01);
|
||||
Emit(InstWord2);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
EmitALUInstr(MI);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void R600CodeEmitter::EmitALUInstr(MachineInstr &MI)
|
||||
{
|
||||
|
||||
unsigned numOperands = MI.getNumExplicitOperands();
|
||||
|
||||
// Some instructions are just place holder instructions that represent
|
||||
// operations that the GPU does automatically. They should be ignored.
|
||||
if (AMDGPU::isPlaceHolderOpcode(MI.getOpcode())) {
|
||||
return;
|
||||
}
|
||||
|
||||
// XXX Check if instruction writes a result
|
||||
if (numOperands < 1) {
|
||||
return;
|
||||
}
|
||||
const MachineOperand dstOp = MI.getOperand(0);
|
||||
|
||||
// Emit instruction type
|
||||
EmitByte(0);
|
||||
|
||||
if (IsCube) {
|
||||
static const int cube_src_swz[] = {2, 2, 0, 1};
|
||||
EmitSrc(MI.getOperand(1), cube_src_swz[currentElement]);
|
||||
EmitSrc(MI.getOperand(1), cube_src_swz[3-currentElement]);
|
||||
EmitNullBytes(SRC_BYTE_COUNT);
|
||||
} else {
|
||||
unsigned int opIndex;
|
||||
for (opIndex = 1; opIndex < numOperands; opIndex++) {
|
||||
// Literal constants are always stored as the last operand.
|
||||
if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
|
||||
break;
|
||||
}
|
||||
EmitSrc(MI.getOperand(opIndex));
|
||||
}
|
||||
|
||||
// Emit zeros for unused sources
|
||||
for ( ; opIndex < 4; opIndex++) {
|
||||
EmitNullBytes(SRC_BYTE_COUNT);
|
||||
}
|
||||
}
|
||||
|
||||
EmitDst(dstOp);
|
||||
|
||||
EmitALU(MI, numOperands - 1);
|
||||
}
|
||||
|
||||
void R600CodeEmitter::EmitSrc(const MachineOperand & MO, int chan_override)
|
||||
{
|
||||
uint32_t value = 0;
|
||||
// Emit the source select (2 bytes). For GPRs, this is the register index.
|
||||
// For other potential instruction operands, (e.g. constant registers) the
|
||||
// value of the source select is defined in the r600isa docs.
|
||||
if (MO.isReg()) {
|
||||
unsigned reg = MO.getReg();
|
||||
EmitTwoBytes(getHWReg(reg));
|
||||
if (reg == AMDGPU::ALU_LITERAL_X) {
|
||||
const MachineInstr * parent = MO.getParent();
|
||||
unsigned immOpIndex = parent->getNumExplicitOperands() - 1;
|
||||
MachineOperand immOp = parent->getOperand(immOpIndex);
|
||||
if (immOp.isFPImm()) {
|
||||
value = immOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue();
|
||||
} else {
|
||||
assert(immOp.isImm());
|
||||
value = immOp.getImm();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// XXX: Handle other operand types.
|
||||
EmitTwoBytes(0);
|
||||
}
|
||||
|
||||
// Emit the source channel (1 byte)
|
||||
if (chan_override != -1) {
|
||||
EmitByte(chan_override);
|
||||
} else if (IsReduction) {
|
||||
EmitByte(currentElement);
|
||||
} else if (MO.isReg()) {
|
||||
EmitByte(TRI->getHWRegChan(MO.getReg()));
|
||||
} else {
|
||||
EmitByte(0);
|
||||
}
|
||||
|
||||
// XXX: Emit isNegated (1 byte)
|
||||
if ((!(MO.getTargetFlags() & MO_FLAG_ABS))
|
||||
&& (MO.getTargetFlags() & MO_FLAG_NEG ||
|
||||
(MO.isReg() &&
|
||||
(MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
|
||||
EmitByte(1);
|
||||
} else {
|
||||
EmitByte(0);
|
||||
}
|
||||
|
||||
// Emit isAbsolute (1 byte)
|
||||
if (MO.getTargetFlags() & MO_FLAG_ABS) {
|
||||
EmitByte(1);
|
||||
} else {
|
||||
EmitByte(0);
|
||||
}
|
||||
|
||||
// XXX: Emit relative addressing mode (1 byte)
|
||||
EmitByte(0);
|
||||
|
||||
// Emit kc_bank, This will be adjusted later by r600_asm
|
||||
EmitByte(0);
|
||||
|
||||
// Emit the literal value, if applicable (4 bytes).
|
||||
Emit(value);
|
||||
|
||||
}
|
||||
|
||||
void R600CodeEmitter::EmitDst(const MachineOperand & MO)
|
||||
{
|
||||
if (MO.isReg()) {
|
||||
// Emit the destination register index (1 byte)
|
||||
EmitByte(getHWReg(MO.getReg()));
|
||||
|
||||
// Emit the element of the destination register (1 byte)
|
||||
if (IsReduction || IsCube || IsVector) {
|
||||
EmitByte(currentElement);
|
||||
} else {
|
||||
EmitByte(TRI->getHWRegChan(MO.getReg()));
|
||||
}
|
||||
|
||||
// Emit isClamped (1 byte)
|
||||
if (MO.getTargetFlags() & MO_FLAG_CLAMP) {
|
||||
EmitByte(1);
|
||||
} else {
|
||||
EmitByte(0);
|
||||
}
|
||||
|
||||
// Emit writemask (1 byte).
|
||||
if (((IsReduction || IsVector) &&
|
||||
currentElement != TRI->getHWRegChan(MO.getReg()))
|
||||
|| MO.getTargetFlags() & MO_FLAG_MASK) {
|
||||
EmitByte(0);
|
||||
} else {
|
||||
EmitByte(1);
|
||||
}
|
||||
|
||||
// XXX: Emit relative addressing mode
|
||||
EmitByte(0);
|
||||
} else {
|
||||
// XXX: Handle other operand types. Are there any for destination regs?
|
||||
EmitNullBytes(DST_BYTE_COUNT);
|
||||
}
|
||||
}
|
||||
|
||||
void R600CodeEmitter::EmitALU(MachineInstr &MI, unsigned numSrc)
|
||||
{
|
||||
// Emit the instruction (2 bytes)
|
||||
EmitTwoBytes(getBinaryCodeForInstr(MI));
|
||||
|
||||
// Emit IsLast (for this instruction group) (1 byte)
|
||||
if (IsLast) {
|
||||
EmitByte(1);
|
||||
} else {
|
||||
EmitByte(0);
|
||||
}
|
||||
// Emit isOp3 (1 byte)
|
||||
if (numSrc == 3) {
|
||||
EmitByte(1);
|
||||
} else {
|
||||
EmitByte(0);
|
||||
}
|
||||
|
||||
// XXX: Emit predicate (1 byte)
|
||||
EmitByte(0);
|
||||
|
||||
// XXX: Emit bank swizzle. (1 byte) Do we need this? It looks like
|
||||
// r600_asm.c sets it.
|
||||
EmitByte(0);
|
||||
|
||||
// XXX: Emit bank_swizzle_force (1 byte) Not sure what this is for.
|
||||
EmitByte(0);
|
||||
|
||||
// XXX: Emit OMOD (1 byte) Not implemented.
|
||||
EmitByte(0);
|
||||
|
||||
// XXX: Emit index_mode. I think this is for indirect addressing, so we
|
||||
// don't need to worry about it.
|
||||
EmitByte(0);
|
||||
}
|
||||
|
||||
void R600CodeEmitter::EmitTexInstr(MachineInstr &MI)
|
||||
{
|
||||
|
||||
unsigned opcode = MI.getOpcode();
|
||||
bool hasOffsets = (opcode == AMDGPU::TEX_LD);
|
||||
unsigned op_offset = hasOffsets ? 3 : 0;
|
||||
int64_t sampler = MI.getOperand(op_offset+2).getImm();
|
||||
int64_t textureType = MI.getOperand(op_offset+3).getImm();
|
||||
unsigned srcSelect[4] = {0, 1, 2, 3};
|
||||
|
||||
// Emit instruction type
|
||||
EmitByte(1);
|
||||
|
||||
// Emit instruction
|
||||
EmitByte(getBinaryCodeForInstr(MI));
|
||||
|
||||
// XXX: Emit resource id r600_shader.c uses sampler + 1. Why?
|
||||
EmitByte(sampler + 1 + 1);
|
||||
|
||||
// Emit source register
|
||||
EmitByte(getHWReg(MI.getOperand(1).getReg()));
|
||||
|
||||
// XXX: Emit src isRelativeAddress
|
||||
EmitByte(0);
|
||||
|
||||
// Emit destination register
|
||||
EmitByte(getHWReg(MI.getOperand(0).getReg()));
|
||||
|
||||
// XXX: Emit dst isRealtiveAddress
|
||||
EmitByte(0);
|
||||
|
||||
// XXX: Emit dst select
|
||||
EmitByte(0); // X
|
||||
EmitByte(1); // Y
|
||||
EmitByte(2); // Z
|
||||
EmitByte(3); // W
|
||||
|
||||
// XXX: Emit lod bias
|
||||
EmitByte(0);
|
||||
|
||||
// XXX: Emit coord types
|
||||
unsigned coordType[4] = {1, 1, 1, 1};
|
||||
|
||||
if (textureType == TEXTURE_RECT
|
||||
|| textureType == TEXTURE_SHADOWRECT) {
|
||||
coordType[ELEMENT_X] = 0;
|
||||
coordType[ELEMENT_Y] = 0;
|
||||
}
|
||||
|
||||
if (textureType == TEXTURE_1D_ARRAY
|
||||
|| textureType == TEXTURE_SHADOW1D_ARRAY) {
|
||||
if (opcode == AMDGPU::TEX_SAMPLE_C_L || opcode == AMDGPU::TEX_SAMPLE_C_LB) {
|
||||
coordType[ELEMENT_Y] = 0;
|
||||
} else {
|
||||
coordType[ELEMENT_Z] = 0;
|
||||
srcSelect[ELEMENT_Z] = ELEMENT_Y;
|
||||
}
|
||||
} else if (textureType == TEXTURE_2D_ARRAY
|
||||
|| textureType == TEXTURE_SHADOW2D_ARRAY) {
|
||||
coordType[ELEMENT_Z] = 0;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
EmitByte(coordType[i]);
|
||||
}
|
||||
|
||||
// XXX: Emit offsets
|
||||
if (hasOffsets)
|
||||
for (unsigned i = 2; i < 5; i++)
|
||||
EmitByte(MI.getOperand(i).getImm()<<1);
|
||||
else
|
||||
EmitNullBytes(3);
|
||||
|
||||
// Emit sampler id
|
||||
EmitByte(sampler);
|
||||
|
||||
// XXX:Emit source select
|
||||
if ((textureType == TEXTURE_SHADOW1D
|
||||
|| textureType == TEXTURE_SHADOW2D
|
||||
|| textureType == TEXTURE_SHADOWRECT
|
||||
|| textureType == TEXTURE_SHADOW1D_ARRAY)
|
||||
&& opcode != AMDGPU::TEX_SAMPLE_C_L
|
||||
&& opcode != AMDGPU::TEX_SAMPLE_C_LB) {
|
||||
srcSelect[ELEMENT_W] = ELEMENT_Z;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
EmitByte(srcSelect[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void R600CodeEmitter::EmitFCInstr(MachineInstr &MI)
|
||||
{
|
||||
// Emit instruction type
|
||||
EmitByte(INSTR_FC);
|
||||
|
||||
// Emit SRC
|
||||
unsigned numOperands = MI.getNumOperands();
|
||||
if (numOperands > 0) {
|
||||
assert(numOperands == 1);
|
||||
EmitSrc(MI.getOperand(0));
|
||||
} else {
|
||||
EmitNullBytes(SRC_BYTE_COUNT);
|
||||
}
|
||||
|
||||
// Emit FC Instruction
|
||||
enum FCInstr instr;
|
||||
switch (MI.getOpcode()) {
|
||||
case AMDGPU::BREAK_LOGICALZ_f32:
|
||||
instr = FC_BREAK;
|
||||
break;
|
||||
case AMDGPU::BREAK_LOGICALNZ_f32:
|
||||
case AMDGPU::BREAK_LOGICALNZ_i32:
|
||||
instr = FC_BREAK_NZ_INT;
|
||||
break;
|
||||
case AMDGPU::BREAK_LOGICALZ_i32:
|
||||
instr = FC_BREAK_Z_INT;
|
||||
break;
|
||||
case AMDGPU::CONTINUE_LOGICALNZ_f32:
|
||||
case AMDGPU::CONTINUE_LOGICALNZ_i32:
|
||||
instr = FC_CONTINUE;
|
||||
break;
|
||||
case AMDGPU::IF_LOGICALNZ_f32:
|
||||
case AMDGPU::IF_LOGICALNZ_i32:
|
||||
instr = FC_IF;
|
||||
break;
|
||||
case AMDGPU::IF_LOGICALZ_f32:
|
||||
abort();
|
||||
break;
|
||||
case AMDGPU::ELSE:
|
||||
instr = FC_ELSE;
|
||||
break;
|
||||
case AMDGPU::ENDIF:
|
||||
instr = FC_ENDIF;
|
||||
break;
|
||||
case AMDGPU::ENDLOOP:
|
||||
instr = FC_ENDLOOP;
|
||||
break;
|
||||
case AMDGPU::WHILELOOP:
|
||||
instr = FC_BGNLOOP;
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
break;
|
||||
}
|
||||
EmitByte(instr);
|
||||
}
|
||||
|
||||
void R600CodeEmitter::EmitNullBytes(unsigned int byteCount)
|
||||
{
|
||||
for (unsigned int i = 0; i < byteCount; i++) {
|
||||
EmitByte(0);
|
||||
}
|
||||
}
|
||||
|
||||
void R600CodeEmitter::EmitByte(unsigned int byte)
|
||||
{
|
||||
_OS.write((uint8_t) byte & 0xff);
|
||||
}
|
||||
void R600CodeEmitter::EmitTwoBytes(unsigned int bytes)
|
||||
{
|
||||
_OS.write((uint8_t) (bytes & 0xff));
|
||||
_OS.write((uint8_t) ((bytes >> 8) & 0xff));
|
||||
}
|
||||
|
||||
void R600CodeEmitter::Emit(uint32_t value)
|
||||
{
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
_OS.write((uint8_t) ((value >> (8 * i)) & 0xff));
|
||||
}
|
||||
}
|
||||
|
||||
void R600CodeEmitter::Emit(uint64_t value)
|
||||
{
|
||||
for (unsigned i = 0; i < 8; i++) {
|
||||
EmitByte((value >> (8 * i)) & 0xff);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned R600CodeEmitter::getHWReg(unsigned regNo) const
|
||||
{
|
||||
unsigned HWReg;
|
||||
|
||||
HWReg = TRI->getEncodingValue(regNo);
|
||||
if (AMDGPU::R600_CReg32RegClass.contains(regNo)) {
|
||||
HWReg += 512;
|
||||
}
|
||||
return HWReg;
|
||||
}
|
||||
|
||||
uint64_t R600CodeEmitter::getMachineOpValue(const MachineInstr &MI,
|
||||
const MachineOperand &MO) const
|
||||
{
|
||||
if (MO.isReg()) {
|
||||
return getHWReg(MO.getReg());
|
||||
} else {
|
||||
return MO.getImm();
|
||||
}
|
||||
}
|
||||
|
||||
#include "AMDGPUGenCodeEmitter.inc"
|
||||
|
|
@ -1,190 +0,0 @@
|
|||
#===-- R600GenRegisterInfo.pl - Script for generating register info files --===#
|
||||
#
|
||||
# The LLVM Compiler Infrastructure
|
||||
#
|
||||
# This file is distributed under the University of Illinois Open Source
|
||||
# License. See LICENSE.TXT for details.
|
||||
#
|
||||
#===------------------------------------------------------------------------===#
|
||||
#
|
||||
# This perl script prints to stdout .td code to be used as R600RegisterInfo.td
|
||||
# it also generates a file called R600HwRegInfo.include, which contains helper
|
||||
# functions for determining the hw encoding of registers.
|
||||
#
|
||||
#===------------------------------------------------------------------------===#
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use constant CONST_REG_COUNT => 100;
|
||||
use constant TEMP_REG_COUNT => 128;
|
||||
|
||||
my $CREG_MAX = CONST_REG_COUNT - 1;
|
||||
my $TREG_MAX = TEMP_REG_COUNT - 1;
|
||||
|
||||
print <<STRING;
|
||||
|
||||
class R600Reg <string name, bits<16> encoding> : Register<name> {
|
||||
let Namespace = "AMDGPU";
|
||||
let HWEncoding = encoding;
|
||||
}
|
||||
|
||||
class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> :
|
||||
RegisterWithSubRegs<n, subregs> {
|
||||
let Namespace = "AMDGPU";
|
||||
let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
|
||||
let HWEncoding = encoding;
|
||||
}
|
||||
|
||||
STRING
|
||||
|
||||
my $i;
|
||||
|
||||
### REG DEFS ###
|
||||
|
||||
my @creg_list = print_reg_defs(CONST_REG_COUNT * 4, "C");
|
||||
my @treg_list = print_reg_defs(TEMP_REG_COUNT * 4, "T");
|
||||
|
||||
my @t128reg;
|
||||
my @treg_x;
|
||||
for (my $i = 0; $i < TEMP_REG_COUNT; $i++) {
|
||||
my $name = "T$i\_XYZW";
|
||||
print qq{def $name : R600Reg_128 <"T$i.XYZW", [T$i\_X, T$i\_Y, T$i\_Z, T$i\_W], $i >;\n};
|
||||
$t128reg[$i] = $name;
|
||||
$treg_x[$i] = "T$i\_X";
|
||||
if ($i % 10 == 0) {
|
||||
$t128reg[$i] .= "\n";
|
||||
$treg_x[$i] .= "\n";
|
||||
}
|
||||
}
|
||||
|
||||
my $treg_string = join(",", @treg_list);
|
||||
my $creg_list = join(",", @creg_list);
|
||||
my $t128_string = join(",", @t128reg);
|
||||
my $treg_x_string = join(",", @treg_x);
|
||||
print <<STRING;
|
||||
|
||||
class RegSet <dag s> {
|
||||
dag set = s;
|
||||
}
|
||||
|
||||
def ZERO : R600Reg<"0.0", 248>;
|
||||
def ONE : R600Reg<"1.0", 249>;
|
||||
def NEG_ONE : R600Reg<"-1.0", 249>;
|
||||
def ONE_INT : R600Reg<"1", 250>;
|
||||
def HALF : R600Reg<"0.5", 252>;
|
||||
def NEG_HALF : R600Reg<"-0.5", 252>;
|
||||
def PV_X : R600Reg<"pv.x", 254>;
|
||||
def ALU_LITERAL_X : R600Reg<"literal.x", 253>;
|
||||
|
||||
def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
|
||||
$creg_list)>;
|
||||
|
||||
def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
|
||||
$treg_string)>;
|
||||
|
||||
def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, (add
|
||||
$treg_x_string)>;
|
||||
|
||||
def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
|
||||
R600_TReg32,
|
||||
R600_CReg32,
|
||||
ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
|
||||
|
||||
def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add
|
||||
$t128_string)>
|
||||
{
|
||||
let CopyCost = -1;
|
||||
}
|
||||
|
||||
STRING
|
||||
|
||||
my %index_map;
|
||||
my %chan_map;
|
||||
|
||||
for ($i = 0; $i <= $#creg_list; $i++) {
|
||||
push(@{$index_map{get_hw_index($i)}}, $creg_list[$i]);
|
||||
push(@{$chan_map{get_chan_str($i)}}, $creg_list[$i]);
|
||||
}
|
||||
|
||||
for ($i = 0; $i <= $#treg_list; $i++) {
|
||||
push(@{$index_map{get_hw_index($i)}}, $treg_list[$i]);
|
||||
push(@{$chan_map{get_chan_str($i)}}, $treg_list[$i]);
|
||||
}
|
||||
|
||||
for ($i = 0; $i <= $#t128reg; $i++) {
|
||||
push(@{$index_map{$i}}, $t128reg[$i]);
|
||||
push(@{$chan_map{'X'}}, $t128reg[$i]);
|
||||
}
|
||||
|
||||
open(OUTFILE, ">", "R600HwRegInfo.include");
|
||||
|
||||
print OUTFILE <<STRING;
|
||||
|
||||
unsigned R600RegisterInfo::getHWRegChanGen(unsigned reg) const
|
||||
{
|
||||
switch(reg) {
|
||||
default: assert(!"Unknown register"); return 0;
|
||||
STRING
|
||||
|
||||
foreach my $key (keys(%chan_map)) {
|
||||
foreach my $reg (@{$chan_map{$key}}) {
|
||||
chomp($reg);
|
||||
print OUTFILE " case AMDGPU::$reg:\n";
|
||||
}
|
||||
my $val;
|
||||
if ($key eq 'X') {
|
||||
$val = 0;
|
||||
} elsif ($key eq 'Y') {
|
||||
$val = 1;
|
||||
} elsif ($key eq 'Z') {
|
||||
$val = 2;
|
||||
} elsif ($key eq 'W') {
|
||||
$val = 3;
|
||||
} else {
|
||||
die("Unknown chan value; $key");
|
||||
}
|
||||
print OUTFILE " return $val;\n\n";
|
||||
}
|
||||
|
||||
print OUTFILE " }\n}\n\n";
|
||||
|
||||
sub print_reg_defs {
|
||||
my ($count, $prefix) = @_;
|
||||
|
||||
my @reg_list;
|
||||
|
||||
for ($i = 0; $i < $count; $i++) {
|
||||
my $hw_index = get_hw_index($i);
|
||||
my $chan= get_chan_str($i);
|
||||
my $name = "$prefix$hw_index\_$chan";
|
||||
print qq{def $name : R600Reg <"$prefix$hw_index.$chan", $hw_index>;\n};
|
||||
$reg_list[$i] = $name;
|
||||
if ($i % 10 == 0) {
|
||||
$reg_list[$i] .= "\n";
|
||||
}
|
||||
}
|
||||
return @reg_list;
|
||||
}
|
||||
|
||||
#Helper functions
|
||||
sub get_hw_index {
|
||||
my ($index) = @_;
|
||||
return int($index / 4);
|
||||
}
|
||||
|
||||
sub get_chan_str {
|
||||
my ($index) = @_;
|
||||
my $chan = $index % 4;
|
||||
if ($chan == 0 ) {
|
||||
return 'X';
|
||||
} elsif ($chan == 1) {
|
||||
return 'Y';
|
||||
} elsif ($chan == 2) {
|
||||
return 'Z';
|
||||
} elsif ($chan == 3) {
|
||||
return 'W';
|
||||
} else {
|
||||
die("Unknown chan value: $chan");
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,286 +0,0 @@
|
|||
//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file
|
||||
// is mostly EmitInstrWithCustomInserter().
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "R600ISelLowering.h"
|
||||
#include "AMDGPUUtil.h"
|
||||
#include "R600InstrInfo.h"
|
||||
#include "R600MachineFunctionInfo.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
|
||||
AMDGPUTargetLowering(TM),
|
||||
TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
|
||||
{
|
||||
setOperationAction(ISD::MUL, MVT::i64, Expand);
|
||||
addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
|
||||
addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
|
||||
addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
|
||||
addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
|
||||
computeRegisterProperties();
|
||||
|
||||
setOperationAction(ISD::FSUB, MVT::f32, Expand);
|
||||
|
||||
setOperationAction(ISD::ROTL, MVT::i32, Custom);
|
||||
|
||||
setSchedulingPreference(Sched::VLIW);
|
||||
}
|
||||
|
||||
MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
|
||||
MachineInstr * MI, MachineBasicBlock * BB) const
|
||||
{
|
||||
MachineFunction * MF = BB->getParent();
|
||||
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||
MachineBasicBlock::iterator I = *MI;
|
||||
|
||||
switch (MI->getOpcode()) {
|
||||
default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
|
||||
case AMDGPU::TGID_X:
|
||||
addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_X);
|
||||
break;
|
||||
case AMDGPU::TGID_Y:
|
||||
addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Y);
|
||||
break;
|
||||
case AMDGPU::TGID_Z:
|
||||
addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Z);
|
||||
break;
|
||||
case AMDGPU::TIDIG_X:
|
||||
addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_X);
|
||||
break;
|
||||
case AMDGPU::TIDIG_Y:
|
||||
addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Y);
|
||||
break;
|
||||
case AMDGPU::TIDIG_Z:
|
||||
addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Z);
|
||||
break;
|
||||
case AMDGPU::NGROUPS_X:
|
||||
lowerImplicitParameter(MI, *BB, MRI, 0);
|
||||
break;
|
||||
case AMDGPU::NGROUPS_Y:
|
||||
lowerImplicitParameter(MI, *BB, MRI, 1);
|
||||
break;
|
||||
case AMDGPU::NGROUPS_Z:
|
||||
lowerImplicitParameter(MI, *BB, MRI, 2);
|
||||
break;
|
||||
case AMDGPU::GLOBAL_SIZE_X:
|
||||
lowerImplicitParameter(MI, *BB, MRI, 3);
|
||||
break;
|
||||
case AMDGPU::GLOBAL_SIZE_Y:
|
||||
lowerImplicitParameter(MI, *BB, MRI, 4);
|
||||
break;
|
||||
case AMDGPU::GLOBAL_SIZE_Z:
|
||||
lowerImplicitParameter(MI, *BB, MRI, 5);
|
||||
break;
|
||||
case AMDGPU::LOCAL_SIZE_X:
|
||||
lowerImplicitParameter(MI, *BB, MRI, 6);
|
||||
break;
|
||||
case AMDGPU::LOCAL_SIZE_Y:
|
||||
lowerImplicitParameter(MI, *BB, MRI, 7);
|
||||
break;
|
||||
case AMDGPU::LOCAL_SIZE_Z:
|
||||
lowerImplicitParameter(MI, *BB, MRI, 8);
|
||||
break;
|
||||
|
||||
case AMDGPU::CLAMP_R600:
|
||||
MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addOperand(MI->getOperand(1));
|
||||
break;
|
||||
|
||||
case AMDGPU::FABS_R600:
|
||||
MI->getOperand(1).addTargetFlag(MO_FLAG_ABS);
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addOperand(MI->getOperand(1));
|
||||
break;
|
||||
|
||||
case AMDGPU::FNEG_R600:
|
||||
MI->getOperand(1).addTargetFlag(MO_FLAG_NEG);
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addOperand(MI->getOperand(1));
|
||||
break;
|
||||
|
||||
case AMDGPU::R600_LOAD_CONST:
|
||||
{
|
||||
int64_t RegIndex = MI->getOperand(1).getImm();
|
||||
unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addReg(ConstantReg);
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::LOAD_INPUT:
|
||||
{
|
||||
int64_t RegIndex = MI->getOperand(1).getImm();
|
||||
addLiveIn(MI, MF, MRI, TII,
|
||||
AMDGPU::R600_TReg32RegClass.getRegister(RegIndex));
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::MASK_WRITE:
|
||||
{
|
||||
unsigned maskedRegister = MI->getOperand(0).getReg();
|
||||
assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
|
||||
MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
|
||||
MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister);
|
||||
def->addTargetFlag(MO_FLAG_MASK);
|
||||
// Return early so the instruction is not erased
|
||||
return BB;
|
||||
}
|
||||
|
||||
case AMDGPU::RAT_WRITE_CACHELESS_eg:
|
||||
{
|
||||
// Convert to DWORD address
|
||||
unsigned NewAddr = MRI.createVirtualRegister(
|
||||
&AMDGPU::R600_TReg32_XRegClass);
|
||||
unsigned ShiftValue = MRI.createVirtualRegister(
|
||||
&AMDGPU::R600_TReg32RegClass);
|
||||
|
||||
// XXX In theory, we should be able to pass ShiftValue directly to
|
||||
// the LSHR_eg instruction as an inline literal, but I tried doing it
|
||||
// this way and it didn't produce the correct results.
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue)
|
||||
.addReg(AMDGPU::ALU_LITERAL_X)
|
||||
.addImm(2);
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addReg(ShiftValue);
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addReg(NewAddr);
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::STORE_OUTPUT:
|
||||
{
|
||||
int64_t OutputIndex = MI->getOperand(1).getImm();
|
||||
unsigned OutputReg = AMDGPU::R600_TReg32RegClass.getRegister(OutputIndex);
|
||||
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY), OutputReg)
|
||||
.addOperand(MI->getOperand(0));
|
||||
|
||||
if (!MRI.isLiveOut(OutputReg)) {
|
||||
MRI.addLiveOut(OutputReg);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::RESERVE_REG:
|
||||
{
|
||||
R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
|
||||
int64_t ReservedIndex = MI->getOperand(0).getImm();
|
||||
unsigned ReservedReg =
|
||||
AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
|
||||
MFI->ReservedRegs.push_back(ReservedReg);
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::TXD:
|
||||
{
|
||||
unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
|
||||
unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
|
||||
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
|
||||
.addOperand(MI->getOperand(3))
|
||||
.addOperand(MI->getOperand(4))
|
||||
.addOperand(MI->getOperand(5));
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
|
||||
.addOperand(MI->getOperand(2))
|
||||
.addOperand(MI->getOperand(4))
|
||||
.addOperand(MI->getOperand(5));
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addOperand(MI->getOperand(4))
|
||||
.addOperand(MI->getOperand(5))
|
||||
.addReg(t0, RegState::Implicit)
|
||||
.addReg(t1, RegState::Implicit);
|
||||
break;
|
||||
}
|
||||
case AMDGPU::TXD_SHADOW:
|
||||
{
|
||||
unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
|
||||
unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
|
||||
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
|
||||
.addOperand(MI->getOperand(3))
|
||||
.addOperand(MI->getOperand(4))
|
||||
.addOperand(MI->getOperand(5));
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
|
||||
.addOperand(MI->getOperand(2))
|
||||
.addOperand(MI->getOperand(4))
|
||||
.addOperand(MI->getOperand(5));
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addOperand(MI->getOperand(4))
|
||||
.addOperand(MI->getOperand(5))
|
||||
.addReg(t0, RegState::Implicit)
|
||||
.addReg(t1, RegState::Implicit);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
MI->eraseFromParent();
|
||||
return BB;
|
||||
}
|
||||
|
||||
void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineRegisterInfo & MRI, unsigned dword_offset) const
|
||||
{
|
||||
MachineBasicBlock::iterator I = *MI;
|
||||
unsigned PtrReg = MRI.createVirtualRegister(&AMDGPU::R600_TReg32_XRegClass);
|
||||
MRI.setRegClass(MI->getOperand(0).getReg(), &AMDGPU::R600_TReg32_XRegClass);
|
||||
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::MOV), PtrReg)
|
||||
.addReg(AMDGPU::ALU_LITERAL_X)
|
||||
.addImm(dword_offset * 4);
|
||||
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::VTX_READ_PARAM_i32_eg))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addReg(PtrReg)
|
||||
.addImm(0);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Custom DAG Lowering Operations
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
switch (Op.getOpcode()) {
|
||||
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
||||
case ISD::ROTL: return LowerROTL(Op, DAG);
|
||||
}
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
|
||||
Op.getOperand(0),
|
||||
Op.getOperand(0),
|
||||
DAG.getNode(ISD::SUB, DL, VT,
|
||||
DAG.getConstant(32, MVT::i32),
|
||||
Op.getOperand(1)));
|
||||
}
|
|
@ -1,48 +0,0 @@
|
|||
//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// R600 DAG Lowering interface definition
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef R600ISELLOWERING_H
|
||||
#define R600ISELLOWERING_H
|
||||
|
||||
#include "AMDGPUISelLowering.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class R600InstrInfo;
|
||||
|
||||
class R600TargetLowering : public AMDGPUTargetLowering
|
||||
{
|
||||
public:
|
||||
R600TargetLowering(TargetMachine &TM);
|
||||
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
|
||||
MachineBasicBlock * BB) const;
|
||||
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
private:
|
||||
const R600InstrInfo * TII;
|
||||
|
||||
/// lowerImplicitParameter - Each OpenCL kernel has nine implicit parameters
|
||||
/// that are stored in the first nine dwords of a Vertex Buffer. These
|
||||
/// implicit parameters are represented by pseudo instructions, which are
|
||||
/// lowered to VTX_READ instructions by this function.
|
||||
void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineRegisterInfo & MRI, unsigned dword_offset) const;
|
||||
|
||||
/// LowerROTL - Lower ROTL opcode to BITALIGN
|
||||
SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
};
|
||||
|
||||
} // End namespace llvm;
|
||||
|
||||
#endif // R600ISELLOWERING_H
|
|
@ -1,105 +0,0 @@
|
|||
//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// R600 Implementation of TargetInstrInfo.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "R600InstrInfo.h"
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "AMDILSubtarget.h"
|
||||
#include "R600RegisterInfo.h"
|
||||
|
||||
#define GET_INSTRINFO_CTOR
|
||||
#include "AMDGPUGenDFAPacketizer.inc"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
|
||||
: AMDGPUInstrInfo(tm),
|
||||
RI(tm, *this)
|
||||
{ }
|
||||
|
||||
const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const
|
||||
{
|
||||
return RI;
|
||||
}
|
||||
|
||||
bool R600InstrInfo::isTrig(const MachineInstr &MI) const
|
||||
{
|
||||
return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
|
||||
}
|
||||
|
||||
bool R600InstrInfo::isVector(const MachineInstr &MI) const
|
||||
{
|
||||
return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
|
||||
}
|
||||
|
||||
void
|
||||
R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI, DebugLoc DL,
|
||||
unsigned DestReg, unsigned SrcReg,
|
||||
bool KillSrc) const
|
||||
{
|
||||
|
||||
unsigned subRegMap[4] = {AMDGPU::sel_x, AMDGPU::sel_y,
|
||||
AMDGPU::sel_z, AMDGPU::sel_w};
|
||||
|
||||
if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
|
||||
&& AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::MOV))
|
||||
.addReg(RI.getSubReg(DestReg, subRegMap[i]), RegState::Define)
|
||||
.addReg(RI.getSubReg(SrcReg, subRegMap[i]))
|
||||
.addReg(DestReg, RegState::Define | RegState::Implicit);
|
||||
}
|
||||
} else {
|
||||
|
||||
/* We can't copy vec4 registers */
|
||||
assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
|
||||
&& !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
|
||||
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
}
|
||||
}
|
||||
|
||||
MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
|
||||
unsigned DstReg, int64_t Imm) const
|
||||
{
|
||||
MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
|
||||
MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
|
||||
MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X);
|
||||
MachineInstrBuilder(MI).addImm(Imm);
|
||||
|
||||
return MI;
|
||||
}
|
||||
|
||||
unsigned R600InstrInfo::getIEQOpcode() const
|
||||
{
|
||||
return AMDGPU::SETE_INT;
|
||||
}
|
||||
|
||||
bool R600InstrInfo::isMov(unsigned Opcode) const
|
||||
{
|
||||
switch(Opcode) {
|
||||
default: return false;
|
||||
case AMDGPU::MOV:
|
||||
case AMDGPU::MOV_IMM_F32:
|
||||
case AMDGPU::MOV_IMM_I32:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
|
||||
const ScheduleDAG *DAG) const
|
||||
{
|
||||
const InstrItineraryData *II = TM->getInstrItineraryData();
|
||||
return TM->getSubtarget<AMDILSubtarget>().createDFAPacketizer(II);
|
||||
}
|
|
@ -1,75 +0,0 @@
|
|||
//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface definition for R600InstrInfo
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef R600INSTRUCTIONINFO_H_
|
||||
#define R600INSTRUCTIONINFO_H_
|
||||
|
||||
#include "AMDIL.h"
|
||||
#include "AMDILInstrInfo.h"
|
||||
#include "R600RegisterInfo.h"
|
||||
|
||||
#include <map>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUTargetMachine;
|
||||
class DFAPacketizer;
|
||||
class ScheduleDAG;
|
||||
class MachineFunction;
|
||||
class MachineInstr;
|
||||
class MachineInstrBuilder;
|
||||
|
||||
class R600InstrInfo : public AMDGPUInstrInfo {
|
||||
private:
|
||||
const R600RegisterInfo RI;
|
||||
|
||||
public:
|
||||
explicit R600InstrInfo(AMDGPUTargetMachine &tm);
|
||||
|
||||
const R600RegisterInfo &getRegisterInfo() const;
|
||||
virtual void copyPhysReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI, DebugLoc DL,
|
||||
unsigned DestReg, unsigned SrcReg,
|
||||
bool KillSrc) const;
|
||||
|
||||
bool isTrig(const MachineInstr &MI) const;
|
||||
|
||||
/// isVector - Vector instructions are instructions that must fill all
|
||||
/// instruction slots within an instruction group.
|
||||
bool isVector(const MachineInstr &MI) const;
|
||||
|
||||
virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
|
||||
int64_t Imm) const;
|
||||
|
||||
virtual unsigned getIEQOpcode() const;
|
||||
virtual bool isMov(unsigned Opcode) const;
|
||||
|
||||
DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
|
||||
const ScheduleDAG *DAG) const;
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
namespace R600_InstFlag {
|
||||
enum TIF {
|
||||
TRANS_ONLY = (1 << 0),
|
||||
TEX = (1 << 1),
|
||||
REDUCTION = (1 << 2),
|
||||
FC = (1 << 3),
|
||||
TRIG = (1 << 4),
|
||||
OP3 = (1 << 5),
|
||||
VECTOR = (1 << 6)
|
||||
};
|
||||
}
|
||||
|
||||
#endif // R600INSTRINFO_H_
|
File diff suppressed because it is too large
Load Diff
|
@ -1,16 +0,0 @@
|
|||
//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// R600 Intrinsic Definitions
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let TargetPrefix = "R600", isTarget = 1 in {
|
||||
def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
}
|
|
@ -1,462 +0,0 @@
|
|||
//===-- R600KernelParameters.cpp - Lower kernel function arguments --------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass lowers kernel function arguments to loads from the vertex buffer.
|
||||
//
|
||||
// Kernel arguemnts are stored in the vertex buffer at an offset of 9 dwords,
|
||||
// so arg0 needs to be loaded from VTX_BUFFER[9] and arg1 is loaded from
|
||||
// VTX_BUFFER[10], etc.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDIL.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/Constants.h"
|
||||
#include "llvm/Function.h"
|
||||
#include "llvm/Intrinsics.h"
|
||||
#include "llvm/Metadata.h"
|
||||
#include "llvm/Module.h"
|
||||
#include "llvm/Target/TargetData.h"
|
||||
#include "llvm/Support/IRBuilder.h"
|
||||
#include "llvm/Support/TypeBuilder.h"
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
#define CONSTANT_CACHE_SIZE_DW 127
|
||||
|
||||
class R600KernelParameters : public FunctionPass {
|
||||
const TargetData *TD;
|
||||
LLVMContext* Context;
|
||||
Module *Mod;
|
||||
|
||||
struct Param {
|
||||
Param() : Val(NULL), PtrVal(NULL), OffsetInDW(0), SizeInDW(0),
|
||||
IsIndirect(true), SpecialID(0) {}
|
||||
|
||||
Value* Val;
|
||||
Value* PtrVal;
|
||||
int OffsetInDW;
|
||||
int SizeInDW;
|
||||
|
||||
bool IsIndirect;
|
||||
|
||||
std::string SpecialType;
|
||||
int SpecialID;
|
||||
|
||||
int End() { return OffsetInDW + SizeInDW; }
|
||||
// The first 9 dwords are reserved for the grid sizes.
|
||||
int getRatOffset() { return 9 + OffsetInDW; }
|
||||
};
|
||||
|
||||
std::vector<Param> Params;
|
||||
|
||||
bool IsOpenCLKernel(const Function *Fun);
|
||||
int getLastSpecialID(const std::string& TypeName);
|
||||
|
||||
int getListSize();
|
||||
void AddParam(Argument *Arg);
|
||||
int CalculateArgumentSize(Argument *Arg);
|
||||
void RunAna(Function *Fun);
|
||||
void Replace(Function *Fun);
|
||||
bool IsIndirect(Value *Val, std::set<Value*> &Visited);
|
||||
void Propagate(Function* Fun);
|
||||
void Propagate(Value *V, const Twine &Name, bool IsIndirect = true);
|
||||
Value* ConstantRead(Function *Fun, Param &P);
|
||||
Value* handleSpecial(Function *Fun, Param &P);
|
||||
bool IsSpecialType(Type *T);
|
||||
std::string getSpecialTypeName(Type *T);
|
||||
public:
|
||||
static char ID;
|
||||
R600KernelParameters() : FunctionPass(ID) {}
|
||||
R600KernelParameters(const TargetData* TD) : FunctionPass(ID), TD(TD) {}
|
||||
bool runOnFunction (Function &F);
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const;
|
||||
const char *getPassName() const;
|
||||
bool doInitialization(Module &M);
|
||||
bool doFinalization(Module &M);
|
||||
};
|
||||
|
||||
char R600KernelParameters::ID = 0;
|
||||
|
||||
static RegisterPass<R600KernelParameters> X("kerparam",
|
||||
"OpenCL Kernel Parameter conversion", false, false);
|
||||
|
||||
bool R600KernelParameters::IsOpenCLKernel(const Function* Fun) {
|
||||
Module *Mod = const_cast<Function*>(Fun)->getParent();
|
||||
NamedMDNode * MD = Mod->getOrInsertNamedMetadata("opencl.kernels");
|
||||
|
||||
if (!MD or !MD->getNumOperands()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < int(MD->getNumOperands()); i++) {
|
||||
if (!MD->getOperand(i) or !MD->getOperand(i)->getOperand(0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(MD->getOperand(i)->getNumOperands() == 1);
|
||||
|
||||
if (MD->getOperand(i)->getOperand(0)->getName() == Fun->getName()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int R600KernelParameters::getLastSpecialID(const std::string &TypeName) {
|
||||
int LastID = -1;
|
||||
|
||||
for (std::vector<Param>::iterator i = Params.begin(); i != Params.end(); i++) {
|
||||
if (i->SpecialType == TypeName) {
|
||||
LastID = i->SpecialID;
|
||||
}
|
||||
}
|
||||
|
||||
return LastID;
|
||||
}
|
||||
|
||||
int R600KernelParameters::getListSize() {
|
||||
if (Params.size() == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return Params.back().End();
|
||||
}
|
||||
|
||||
bool R600KernelParameters::IsIndirect(Value *Val, std::set<Value*> &Visited) {
|
||||
//XXX Direct parameters are not supported yet, so return true here.
|
||||
return true;
|
||||
#if 0
|
||||
if (isa<LoadInst>(Val)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (isa<IntegerType>(Val->getType())) {
|
||||
assert(0 and "Internal error");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Visited.count(Val)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Visited.insert(Val);
|
||||
|
||||
if (isa<getElementPtrInst>(Val)) {
|
||||
getElementPtrInst* GEP = dyn_cast<getElementPtrInst>(Val);
|
||||
getElementPtrInst::op_iterator I = GEP->op_begin();
|
||||
|
||||
for (++I; I != GEP->op_end(); ++I) {
|
||||
if (!isa<Constant>(*I)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (Value::use_iterator I = Val->use_begin(); i != Val->use_end(); ++I) {
|
||||
Value* V2 = dyn_cast<Value>(*I);
|
||||
|
||||
if (V2) {
|
||||
if (IsIndirect(V2, Visited)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
void R600KernelParameters::AddParam(Argument *Arg) {
|
||||
Param P;
|
||||
|
||||
P.Val = dyn_cast<Value>(Arg);
|
||||
P.OffsetInDW = getListSize();
|
||||
P.SizeInDW = CalculateArgumentSize(Arg);
|
||||
|
||||
if (isa<PointerType>(Arg->getType()) and Arg->hasByValAttr()) {
|
||||
std::set<Value*> Visited;
|
||||
P.IsIndirect = IsIndirect(P.Val, Visited);
|
||||
}
|
||||
|
||||
Params.push_back(P);
|
||||
}
|
||||
|
||||
int R600KernelParameters::CalculateArgumentSize(Argument *Arg) {
|
||||
Type* T = Arg->getType();
|
||||
|
||||
if (Arg->hasByValAttr() and dyn_cast<PointerType>(T)) {
|
||||
T = dyn_cast<PointerType>(T)->getElementType();
|
||||
}
|
||||
|
||||
int StoreSizeInDW = (TD->getTypeStoreSize(T) + 3)/4;
|
||||
|
||||
assert(StoreSizeInDW);
|
||||
|
||||
return StoreSizeInDW;
|
||||
}
|
||||
|
||||
|
||||
void R600KernelParameters::RunAna(Function* Fun) {
|
||||
assert(IsOpenCLKernel(Fun));
|
||||
|
||||
for (Function::arg_iterator I = Fun->arg_begin(); I != Fun->arg_end(); ++I) {
|
||||
AddParam(I);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void R600KernelParameters::Replace(Function* Fun) {
|
||||
for (std::vector<Param>::iterator I = Params.begin(); I != Params.end(); ++I) {
|
||||
Value *NewVal;
|
||||
|
||||
if (IsSpecialType(I->Val->getType())) {
|
||||
NewVal = handleSpecial(Fun, *I);
|
||||
} else {
|
||||
NewVal = ConstantRead(Fun, *I);
|
||||
}
|
||||
if (NewVal) {
|
||||
I->Val->replaceAllUsesWith(NewVal);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void R600KernelParameters::Propagate(Function* Fun) {
|
||||
for (std::vector<Param>::iterator I = Params.begin(); I != Params.end(); ++I) {
|
||||
if (I->PtrVal) {
|
||||
Propagate(I->PtrVal, I->Val->getName(), I->IsIndirect);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void R600KernelParameters::Propagate(Value* V, const Twine& Name, bool IsIndirect) {
|
||||
LoadInst* Load = dyn_cast<LoadInst>(V);
|
||||
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V);
|
||||
|
||||
unsigned Addrspace;
|
||||
|
||||
if (IsIndirect) {
|
||||
Addrspace = AMDILAS::PARAM_I_ADDRESS;
|
||||
} else {
|
||||
Addrspace = AMDILAS::PARAM_D_ADDRESS;
|
||||
}
|
||||
|
||||
if (GEP and GEP->getType()->getAddressSpace() != Addrspace) {
|
||||
Value *Op = GEP->getPointerOperand();
|
||||
|
||||
if (dyn_cast<PointerType>(Op->getType())->getAddressSpace() != Addrspace) {
|
||||
Op = new BitCastInst(Op, PointerType::get(dyn_cast<PointerType>(
|
||||
Op->getType())->getElementType(), Addrspace),
|
||||
Name, dyn_cast<Instruction>(V));
|
||||
}
|
||||
|
||||
std::vector<Value*> Params(GEP->idx_begin(), GEP->idx_end());
|
||||
|
||||
GetElementPtrInst* GEP2 = GetElementPtrInst::Create(Op, Params, Name,
|
||||
dyn_cast<Instruction>(V));
|
||||
GEP2->setIsInBounds(GEP->isInBounds());
|
||||
V = dyn_cast<Value>(GEP2);
|
||||
GEP->replaceAllUsesWith(GEP2);
|
||||
GEP->eraseFromParent();
|
||||
Load = NULL;
|
||||
}
|
||||
|
||||
if (Load) {
|
||||
///normally at this point we have the right address space
|
||||
if (Load->getPointerAddressSpace() != Addrspace) {
|
||||
Value *OrigPtr = Load->getPointerOperand();
|
||||
PointerType *OrigPtrType = dyn_cast<PointerType>(OrigPtr->getType());
|
||||
|
||||
Type* NewPtrType = PointerType::get(OrigPtrType->getElementType(),
|
||||
Addrspace);
|
||||
|
||||
Value* NewPtr = OrigPtr;
|
||||
|
||||
if (OrigPtr->getType() != NewPtrType) {
|
||||
NewPtr = new BitCastInst(OrigPtr, NewPtrType, "prop_cast", Load);
|
||||
}
|
||||
|
||||
Value* new_Load = new LoadInst(NewPtr, Name, Load);
|
||||
Load->replaceAllUsesWith(new_Load);
|
||||
Load->eraseFromParent();
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<User*> Users(V->use_begin(), V->use_end());
|
||||
|
||||
for (int i = 0; i < int(Users.size()); i++) {
|
||||
Value* V2 = dyn_cast<Value>(Users[i]);
|
||||
|
||||
if (V2) {
|
||||
Propagate(V2, Name, IsIndirect);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Value* R600KernelParameters::ConstantRead(Function *Fun, Param &P) {
|
||||
assert(Fun->front().begin() != Fun->front().end());
|
||||
|
||||
Instruction *FirstInst = Fun->front().begin();
|
||||
IRBuilder <> Builder (FirstInst);
|
||||
/* First 3 dwords are reserved for the dimmension info */
|
||||
|
||||
if (!P.Val->hasNUsesOrMore(1)) {
|
||||
return NULL;
|
||||
}
|
||||
unsigned Addrspace;
|
||||
|
||||
if (P.IsIndirect) {
|
||||
Addrspace = AMDILAS::PARAM_I_ADDRESS;
|
||||
} else {
|
||||
Addrspace = AMDILAS::PARAM_D_ADDRESS;
|
||||
}
|
||||
|
||||
Argument *Arg = dyn_cast<Argument>(P.Val);
|
||||
Type * ArgType = P.Val->getType();
|
||||
PointerType * ArgPtrType = dyn_cast<PointerType>(P.Val->getType());
|
||||
|
||||
if (ArgPtrType and Arg->hasByValAttr()) {
|
||||
Value* ParamAddrSpacePtr = ConstantPointerNull::get(
|
||||
PointerType::get(Type::getInt32Ty(*Context),
|
||||
Addrspace));
|
||||
Value* ParamPtr = GetElementPtrInst::Create(ParamAddrSpacePtr,
|
||||
ConstantInt::get(Type::getInt32Ty(*Context),
|
||||
P.getRatOffset()), Arg->getName(),
|
||||
FirstInst);
|
||||
ParamPtr = new BitCastInst(ParamPtr,
|
||||
PointerType::get(ArgPtrType->getElementType(),
|
||||
Addrspace),
|
||||
Arg->getName(), FirstInst);
|
||||
P.PtrVal = ParamPtr;
|
||||
return ParamPtr;
|
||||
} else {
|
||||
Value *ParamAddrSpacePtr = ConstantPointerNull::get(PointerType::get(
|
||||
ArgType, Addrspace));
|
||||
|
||||
Value *ParamPtr = Builder.CreateGEP(ParamAddrSpacePtr,
|
||||
ConstantInt::get(Type::getInt32Ty(*Context), P.getRatOffset()),
|
||||
Arg->getName());
|
||||
|
||||
Value *Param_Value = Builder.CreateLoad(ParamPtr, Arg->getName());
|
||||
|
||||
return Param_Value;
|
||||
}
|
||||
}
|
||||
|
||||
Value* R600KernelParameters::handleSpecial(Function* Fun, Param& P) {
|
||||
std::string Name = getSpecialTypeName(P.Val->getType());
|
||||
int ID;
|
||||
|
||||
assert(!Name.empty());
|
||||
|
||||
if (Name == "image2d_t" or Name == "image3d_t") {
|
||||
int LastID = std::max(getLastSpecialID("image2d_t"),
|
||||
getLastSpecialID("image3d_t"));
|
||||
|
||||
if (LastID == -1) {
|
||||
ID = 2; ///ID0 and ID1 are used internally by the driver
|
||||
} else {
|
||||
ID = LastID + 1;
|
||||
}
|
||||
} else if (Name == "sampler_t") {
|
||||
int LastID = getLastSpecialID("sampler_t");
|
||||
|
||||
if (LastID == -1) {
|
||||
ID = 0;
|
||||
} else {
|
||||
ID = LastID + 1;
|
||||
}
|
||||
} else {
|
||||
///TODO: give some error message
|
||||
return NULL;
|
||||
}
|
||||
|
||||
P.SpecialType = Name;
|
||||
P.SpecialID = ID;
|
||||
|
||||
Instruction *FirstInst = Fun->front().begin();
|
||||
|
||||
return new IntToPtrInst(ConstantInt::get(Type::getInt32Ty(*Context),
|
||||
P.SpecialID), P.Val->getType(),
|
||||
"resourceID", FirstInst);
|
||||
}
|
||||
|
||||
|
||||
bool R600KernelParameters::IsSpecialType(Type* T) {
|
||||
return !getSpecialTypeName(T).empty();
|
||||
}
|
||||
|
||||
std::string R600KernelParameters::getSpecialTypeName(Type* T) {
|
||||
PointerType *PT = dyn_cast<PointerType>(T);
|
||||
StructType *ST = NULL;
|
||||
|
||||
if (PT) {
|
||||
ST = dyn_cast<StructType>(PT->getElementType());
|
||||
}
|
||||
|
||||
if (ST) {
|
||||
std::string Prefix = "struct.opencl_builtin_type_";
|
||||
|
||||
std::string Name = ST->getName().str();
|
||||
|
||||
if (Name.substr(0, Prefix.length()) == Prefix) {
|
||||
return Name.substr(Prefix.length(), Name.length());
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
bool R600KernelParameters::runOnFunction (Function &F) {
|
||||
if (!IsOpenCLKernel(&F)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
RunAna(&F);
|
||||
Replace(&F);
|
||||
Propagate(&F);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void R600KernelParameters::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
FunctionPass::getAnalysisUsage(AU);
|
||||
AU.setPreservesAll();
|
||||
}
|
||||
|
||||
const char *R600KernelParameters::getPassName() const {
|
||||
return "OpenCL Kernel parameter conversion to memory";
|
||||
}
|
||||
|
||||
bool R600KernelParameters::doInitialization(Module &M) {
|
||||
Context = &M.getContext();
|
||||
Mod = &M;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool R600KernelParameters::doFinalization(Module &M) {
|
||||
return false;
|
||||
}
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
FunctionPass* llvm::createR600KernelParametersPass(const TargetData* TD) {
|
||||
return new R600KernelParameters(TD);
|
||||
}
|
|
@ -1,16 +0,0 @@
|
|||
//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "R600MachineFunctionInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
|
||||
: MachineFunctionInfo()
|
||||
{ }
|
|
@ -1,33 +0,0 @@
|
|||
//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// R600MachineFunctionInfo is used for keeping track of which registers have
|
||||
// been reserved by the llvm.AMDGPU.reserve.reg intrinsic.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef R600MACHINEFUNCTIONINFO_H
|
||||
#define R600MACHINEFUNCTIONINFO_H
|
||||
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class R600MachineFunctionInfo : public MachineFunctionInfo {
|
||||
|
||||
public:
|
||||
R600MachineFunctionInfo(const MachineFunction &MF);
|
||||
std::vector<unsigned> ReservedRegs;
|
||||
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#endif //R600MACHINEFUNCTIONINFO_H
|
|
@ -1,88 +0,0 @@
|
|||
//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The file contains the R600 implementation of the TargetRegisterInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "R600RegisterInfo.h"
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "R600MachineFunctionInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm,
|
||||
const TargetInstrInfo &tii)
|
||||
: AMDGPURegisterInfo(tm, tii),
|
||||
TM(tm),
|
||||
TII(tii)
|
||||
{ }
|
||||
|
||||
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const
|
||||
{
|
||||
BitVector Reserved(getNumRegs());
|
||||
const R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>();
|
||||
|
||||
Reserved.set(AMDGPU::ZERO);
|
||||
Reserved.set(AMDGPU::HALF);
|
||||
Reserved.set(AMDGPU::ONE);
|
||||
Reserved.set(AMDGPU::ONE_INT);
|
||||
Reserved.set(AMDGPU::NEG_HALF);
|
||||
Reserved.set(AMDGPU::NEG_ONE);
|
||||
Reserved.set(AMDGPU::PV_X);
|
||||
Reserved.set(AMDGPU::ALU_LITERAL_X);
|
||||
|
||||
for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(),
|
||||
E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) {
|
||||
Reserved.set(*I);
|
||||
}
|
||||
|
||||
for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
|
||||
E = MFI->ReservedRegs.end(); I != E; ++I) {
|
||||
Reserved.set(*I);
|
||||
}
|
||||
|
||||
return Reserved;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *
|
||||
R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const
|
||||
{
|
||||
switch (rc->getID()) {
|
||||
case AMDGPU::GPRF32RegClassID:
|
||||
case AMDGPU::GPRI32RegClassID:
|
||||
return &AMDGPU::R600_Reg32RegClass;
|
||||
default: return rc;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const
|
||||
{
|
||||
switch(reg) {
|
||||
case AMDGPU::ZERO:
|
||||
case AMDGPU::ONE:
|
||||
case AMDGPU::ONE_INT:
|
||||
case AMDGPU::NEG_ONE:
|
||||
case AMDGPU::HALF:
|
||||
case AMDGPU::NEG_HALF:
|
||||
case AMDGPU::ALU_LITERAL_X:
|
||||
return 0;
|
||||
default: return getHWRegChanGen(reg);
|
||||
}
|
||||
}
|
||||
|
||||
const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
|
||||
MVT VT) const
|
||||
{
|
||||
switch(VT.SimpleTy) {
|
||||
default:
|
||||
case MVT::i32: return &AMDGPU::R600_TReg32RegClass;
|
||||
}
|
||||
}
|
||||
#include "R600HwRegInfo.include"
|
|
@ -1,54 +0,0 @@
|
|||
//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface definition for R600RegisterInfo
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef R600REGISTERINFO_H_
|
||||
#define R600REGISTERINFO_H_
|
||||
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "AMDILRegisterInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class R600TargetMachine;
|
||||
class TargetInstrInfo;
|
||||
|
||||
struct R600RegisterInfo : public AMDGPURegisterInfo
|
||||
{
|
||||
AMDGPUTargetMachine &TM;
|
||||
const TargetInstrInfo &TII;
|
||||
|
||||
R600RegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
|
||||
|
||||
virtual BitVector getReservedRegs(const MachineFunction &MF) const;
|
||||
|
||||
/// getISARegClass - rc is an AMDIL reg class. This function returns the
|
||||
/// R600 reg class that is equivalent to the given AMDIL reg class.
|
||||
virtual const TargetRegisterClass * getISARegClass(
|
||||
const TargetRegisterClass * rc) const;
|
||||
|
||||
/// getHWRegChan - get the HW encoding for a register's channel.
|
||||
unsigned getHWRegChan(unsigned reg) const;
|
||||
|
||||
/// getCFGStructurizerRegClass - get the register class of the specified
|
||||
/// type to use in the CFGStructurizer
|
||||
virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
|
||||
|
||||
private:
|
||||
/// getHWRegChanGen - Generated function returns a register's channel
|
||||
/// encoding.
|
||||
unsigned getHWRegChanGen(unsigned reg) const;
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
#endif // AMDIDSAREGISTERINFO_H_
|
File diff suppressed because it is too large
Load Diff
|
@ -1,36 +0,0 @@
|
|||
//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// R600 has a VLIW architecture. On pre-cayman cards there are 5 instruction
|
||||
// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS. For cayman cards, the TRANS
|
||||
// slot has been removed.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
def ALU_X : FuncUnit;
|
||||
def ALU_Y : FuncUnit;
|
||||
def ALU_Z : FuncUnit;
|
||||
def ALU_W : FuncUnit;
|
||||
def TRANS : FuncUnit;
|
||||
|
||||
def AnyALU : InstrItinClass;
|
||||
def VecALU : InstrItinClass;
|
||||
def TransALU : InstrItinClass;
|
||||
|
||||
def R600_EG_Itin : ProcessorItineraries <
|
||||
[ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL],
|
||||
[],
|
||||
[
|
||||
InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS]>]>,
|
||||
InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>,
|
||||
InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>,
|
||||
InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
|
||||
]
|
||||
>;
|
|
@ -1,117 +0,0 @@
|
|||
//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass maps the pseudo interpolation registers to the correct physical
|
||||
// registers. Prior to executing a fragment shader, the GPU loads interpolation
|
||||
// parameters into physical registers. The specific physical register that each
|
||||
// interpolation parameter ends up in depends on the type of the interpolation
|
||||
// parameter as well as how many interpolation parameters are used by the
|
||||
// shader.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUUtil.h"
|
||||
#include "AMDIL.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class SIAssignInterpRegsPass : public MachineFunctionPass {
|
||||
|
||||
private:
|
||||
static char ID;
|
||||
TargetMachine &TM;
|
||||
|
||||
public:
|
||||
SIAssignInterpRegsPass(TargetMachine &tm) :
|
||||
MachineFunctionPass(ID), TM(tm) { }
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
const char *getPassName() const { return "SI Assign intrpolation registers"; }
|
||||
};
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
char SIAssignInterpRegsPass::ID = 0;
|
||||
|
||||
#define INTERP_VALUES 16
|
||||
|
||||
struct interp_info {
|
||||
bool enabled;
|
||||
unsigned regs[3];
|
||||
unsigned reg_count;
|
||||
};
|
||||
|
||||
|
||||
FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) {
|
||||
return new SIAssignInterpRegsPass(tm);
|
||||
}
|
||||
|
||||
bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF)
|
||||
{
|
||||
|
||||
struct interp_info InterpUse[INTERP_VALUES] = {
|
||||
{false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2},
|
||||
{false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2},
|
||||
{false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2},
|
||||
{false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3},
|
||||
{false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2},
|
||||
{false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2},
|
||||
{false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2},
|
||||
{false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1},
|
||||
{false, {AMDGPU::POS_X_FLOAT}, 1},
|
||||
{false, {AMDGPU::POS_Y_FLOAT}, 1},
|
||||
{false, {AMDGPU::POS_Z_FLOAT}, 1},
|
||||
{false, {AMDGPU::POS_W_FLOAT}, 1},
|
||||
{false, {AMDGPU::FRONT_FACE}, 1},
|
||||
{false, {AMDGPU::ANCILLARY}, 1},
|
||||
{false, {AMDGPU::SAMPLE_COVERAGE}, 1},
|
||||
{false, {AMDGPU::POS_FIXED_PT}, 1}
|
||||
};
|
||||
|
||||
SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
|
||||
/* First pass, mark the interpolation values that are used. */
|
||||
for (unsigned interp_idx = 0; interp_idx < INTERP_VALUES; interp_idx++) {
|
||||
for (unsigned reg_idx = 0; reg_idx < InterpUse[interp_idx].reg_count;
|
||||
reg_idx++) {
|
||||
InterpUse[interp_idx].enabled =
|
||||
!MRI.use_empty(InterpUse[interp_idx].regs[reg_idx]);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned used_vgprs = 0;
|
||||
|
||||
/* Second pass, replace with VGPRs. */
|
||||
for (unsigned interp_idx = 0; interp_idx < INTERP_VALUES; interp_idx++) {
|
||||
if (!InterpUse[interp_idx].enabled) {
|
||||
continue;
|
||||
}
|
||||
MFI->spi_ps_input_addr |= (1 << interp_idx);
|
||||
|
||||
for (unsigned reg_idx = 0; reg_idx < InterpUse[interp_idx].reg_count;
|
||||
reg_idx++, used_vgprs++) {
|
||||
unsigned new_reg = AMDGPU::VReg_32RegClass.getRegister(used_vgprs);
|
||||
unsigned virt_reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
|
||||
MRI.replaceRegWith(InterpUse[interp_idx].regs[reg_idx], virt_reg);
|
||||
AMDGPU::utilAddLiveIn(&MF, MRI, TM.getInstrInfo(), new_reg, virt_reg);
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
|
@ -1,321 +0,0 @@
|
|||
//===-- SICodeEmitter.cpp - SI Code Emitter -------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The SI code emitter produces machine code that can be executed directly on
|
||||
// the GPU device.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUUtil.h"
|
||||
#include "AMDILCodeEmitter.h"
|
||||
#include "SIInstrInfo.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/Support/FormattedStream.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#define LITERAL_REG 255
|
||||
#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1))
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class SICodeEmitter : public MachineFunctionPass, public AMDILCodeEmitter {
|
||||
|
||||
private:
|
||||
static char ID;
|
||||
formatted_raw_ostream &_OS;
|
||||
const TargetMachine *TM;
|
||||
void emitState(MachineFunction & MF);
|
||||
void emitInstr(MachineInstr &MI);
|
||||
|
||||
void outputBytes(uint64_t value, unsigned bytes);
|
||||
unsigned GPRAlign(const MachineInstr &MI, unsigned OpNo, unsigned shift)
|
||||
const;
|
||||
|
||||
public:
|
||||
SICodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
|
||||
_OS(OS), TM(NULL) { }
|
||||
const char *getPassName() const { return "SI Code Emitter"; }
|
||||
bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
/// getMachineOpValue - Return the encoding for MO
|
||||
virtual uint64_t getMachineOpValue(const MachineInstr &MI,
|
||||
const MachineOperand &MO) const;
|
||||
|
||||
/// GPR4AlignEncode - Encoding for when 4 consectuive registers are used
|
||||
virtual unsigned GPR4AlignEncode(const MachineInstr &MI, unsigned OpNo)
|
||||
const;
|
||||
|
||||
/// GPR2AlignEncode - Encoding for when 2 consecutive registers are used
|
||||
virtual unsigned GPR2AlignEncode(const MachineInstr &MI, unsigned OpNo)
|
||||
const;
|
||||
/// i32LiteralEncode - Encode an i32 literal this is used as an operand
|
||||
/// for an instruction in place of a register.
|
||||
virtual uint64_t i32LiteralEncode(const MachineInstr &MI, unsigned OpNo)
|
||||
const;
|
||||
/// SMRDmemriEncode - Encoding for SMRD indexed loads
|
||||
virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
|
||||
const;
|
||||
|
||||
/// VOPPostEncode - Post-Encoder method for VOP instructions
|
||||
virtual uint64_t VOPPostEncode(const MachineInstr &MI,
|
||||
uint64_t Value) const;
|
||||
};
|
||||
}
|
||||
|
||||
char SICodeEmitter::ID = 0;
|
||||
|
||||
FunctionPass *llvm::createSICodeEmitterPass(formatted_raw_ostream &OS) {
|
||||
return new SICodeEmitter(OS);
|
||||
}
|
||||
|
||||
void SICodeEmitter::emitState(MachineFunction & MF)
|
||||
{
|
||||
unsigned maxSGPR = 0;
|
||||
unsigned maxVGPR = 0;
|
||||
bool VCCUsed = false;
|
||||
const SIRegisterInfo * RI =
|
||||
static_cast<const SIRegisterInfo*>(TM->getRegisterInfo());
|
||||
SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||
BB != BB_E; ++BB) {
|
||||
MachineBasicBlock &MBB = *BB;
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||
I != E; ++I) {
|
||||
MachineInstr &MI = *I;
|
||||
unsigned numOperands = MI.getNumOperands();
|
||||
for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
|
||||
MachineOperand & MO = MI.getOperand(op_idx);
|
||||
unsigned maxUsed;
|
||||
unsigned width = 0;
|
||||
bool isSGPR = false;
|
||||
unsigned reg;
|
||||
unsigned hwReg;
|
||||
if (!MO.isReg()) {
|
||||
continue;
|
||||
}
|
||||
reg = MO.getReg();
|
||||
if (reg == AMDGPU::VCC) {
|
||||
VCCUsed = true;
|
||||
continue;
|
||||
}
|
||||
if (AMDGPU::SReg_32RegClass.contains(reg)) {
|
||||
isSGPR = true;
|
||||
width = 1;
|
||||
} else if (AMDGPU::VReg_32RegClass.contains(reg)) {
|
||||
isSGPR = false;
|
||||
width = 1;
|
||||
} else if (AMDGPU::SReg_64RegClass.contains(reg)) {
|
||||
isSGPR = true;
|
||||
width = 2;
|
||||
} else if (AMDGPU::VReg_64RegClass.contains(reg)) {
|
||||
isSGPR = false;
|
||||
width = 2;
|
||||
} else if (AMDGPU::SReg_128RegClass.contains(reg)) {
|
||||
isSGPR = true;
|
||||
width = 4;
|
||||
} else if (AMDGPU::VReg_128RegClass.contains(reg)) {
|
||||
isSGPR = false;
|
||||
width = 4;
|
||||
} else if (AMDGPU::SReg_256RegClass.contains(reg)) {
|
||||
isSGPR = true;
|
||||
width = 8;
|
||||
} else {
|
||||
assert("!Unknown register class");
|
||||
}
|
||||
hwReg = RI->getEncodingValue(reg);
|
||||
maxUsed = ((hwReg + 1) * width) - 1;
|
||||
if (isSGPR) {
|
||||
maxSGPR = maxUsed > maxSGPR ? maxUsed : maxSGPR;
|
||||
} else {
|
||||
maxVGPR = maxUsed > maxVGPR ? maxUsed : maxVGPR;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (VCCUsed) {
|
||||
maxSGPR += 2;
|
||||
}
|
||||
outputBytes(maxSGPR + 1, 4);
|
||||
outputBytes(maxVGPR + 1, 4);
|
||||
outputBytes(MFI->spi_ps_input_addr, 4);
|
||||
}
|
||||
|
||||
bool SICodeEmitter::runOnMachineFunction(MachineFunction &MF)
|
||||
{
|
||||
TM = &MF.getTarget();
|
||||
const AMDILSubtarget &STM = TM->getSubtarget<AMDILSubtarget>();
|
||||
|
||||
if (STM.dumpCode()) {
|
||||
MF.dump();
|
||||
}
|
||||
|
||||
emitState(MF);
|
||||
|
||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||
BB != BB_E; ++BB) {
|
||||
MachineBasicBlock &MBB = *BB;
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||
I != E; ++I) {
|
||||
MachineInstr &MI = *I;
|
||||
if (MI.getOpcode() != AMDGPU::KILL && MI.getOpcode() != AMDGPU::RETURN) {
|
||||
emitInstr(MI);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Emit S_END_PGM
|
||||
MachineInstr * End = BuildMI(MF, DebugLoc(),
|
||||
TM->getInstrInfo()->get(AMDGPU::S_ENDPGM));
|
||||
emitInstr(*End);
|
||||
return false;
|
||||
}
|
||||
|
||||
void SICodeEmitter::emitInstr(MachineInstr &MI)
|
||||
{
|
||||
const SIInstrInfo * SII = static_cast<const SIInstrInfo*>(TM->getInstrInfo());
|
||||
|
||||
uint64_t hwInst = getBinaryCodeForInstr(MI);
|
||||
|
||||
if ((hwInst & 0xffffffff) == 0xffffffff) {
|
||||
fprintf(stderr, "Unsupported Instruction: \n");
|
||||
MI.dump();
|
||||
abort();
|
||||
}
|
||||
|
||||
unsigned bytes = SII->getEncodingBytes(MI);
|
||||
outputBytes(hwInst, bytes);
|
||||
}
|
||||
|
||||
uint64_t SICodeEmitter::getMachineOpValue(const MachineInstr &MI,
|
||||
const MachineOperand &MO) const
|
||||
{
|
||||
const SIRegisterInfo * RI =
|
||||
static_cast<const SIRegisterInfo*>(TM->getRegisterInfo());
|
||||
|
||||
switch(MO.getType()) {
|
||||
case MachineOperand::MO_Register:
|
||||
return RI->getEncodingValue(MO.getReg());
|
||||
|
||||
case MachineOperand::MO_Immediate:
|
||||
return MO.getImm();
|
||||
|
||||
case MachineOperand::MO_FPImmediate:
|
||||
// XXX: Not all instructions can use inline literals
|
||||
// XXX: We should make sure this is a 32-bit constant
|
||||
return LITERAL_REG | (MO.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue() << 32);
|
||||
default:
|
||||
llvm_unreachable("Encoding of this operand type is not supported yet.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned SICodeEmitter::GPRAlign(const MachineInstr &MI, unsigned OpNo,
|
||||
unsigned shift) const
|
||||
{
|
||||
const SIRegisterInfo * RI =
|
||||
static_cast<const SIRegisterInfo*>(TM->getRegisterInfo());
|
||||
unsigned regCode = RI->getEncodingValue(MI.getOperand(OpNo).getReg());
|
||||
return regCode >> shift;
|
||||
}
|
||||
|
||||
unsigned SICodeEmitter::GPR4AlignEncode(const MachineInstr &MI,
|
||||
unsigned OpNo) const
|
||||
{
|
||||
return GPRAlign(MI, OpNo, 2);
|
||||
}
|
||||
|
||||
unsigned SICodeEmitter::GPR2AlignEncode(const MachineInstr &MI,
|
||||
unsigned OpNo) const
|
||||
{
|
||||
return GPRAlign(MI, OpNo, 1);
|
||||
}
|
||||
|
||||
uint64_t SICodeEmitter::i32LiteralEncode(const MachineInstr &MI,
|
||||
unsigned OpNo) const
|
||||
{
|
||||
return LITERAL_REG | (MI.getOperand(OpNo).getImm() << 32);
|
||||
}
|
||||
|
||||
#define SMRD_OFFSET_MASK 0xff
|
||||
#define SMRD_IMM_SHIFT 8
|
||||
#define SMRD_SBASE_MASK 0x3f
|
||||
#define SMRD_SBASE_SHIFT 9
|
||||
/// SMRDmemriEncode - This function is responsibe for encoding the offset
|
||||
/// and the base ptr for SMRD instructions it should return a bit string in
|
||||
/// this format:
|
||||
///
|
||||
/// OFFSET = bits{7-0}
|
||||
/// IMM = bits{8}
|
||||
/// SBASE = bits{14-9}
|
||||
///
|
||||
uint32_t SICodeEmitter::SMRDmemriEncode(const MachineInstr &MI,
|
||||
unsigned OpNo) const
|
||||
{
|
||||
uint32_t encoding;
|
||||
|
||||
const MachineOperand &OffsetOp = MI.getOperand(OpNo + 1);
|
||||
|
||||
//XXX: Use this function for SMRD loads with register offsets
|
||||
assert(OffsetOp.isImm());
|
||||
|
||||
encoding =
|
||||
(getMachineOpValue(MI, OffsetOp) & SMRD_OFFSET_MASK)
|
||||
| (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit
|
||||
| ((GPR2AlignEncode(MI, OpNo) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT)
|
||||
;
|
||||
|
||||
return encoding;
|
||||
}
|
||||
|
||||
/// Set the "VGPR" bit for VOP args that can take either a VGPR or a SGPR.
|
||||
/// XXX: It would be nice if we could handle this without a PostEncode function.
|
||||
uint64_t SICodeEmitter::VOPPostEncode(const MachineInstr &MI,
|
||||
uint64_t Value) const
|
||||
{
|
||||
const SIInstrInfo * SII = static_cast<const SIInstrInfo*>(TM->getInstrInfo());
|
||||
unsigned encodingType = SII->getEncodingType(MI);
|
||||
unsigned numSrcOps;
|
||||
unsigned vgprBitOffset;
|
||||
|
||||
if (encodingType == SIInstrEncodingType::VOP3) {
|
||||
numSrcOps = 3;
|
||||
vgprBitOffset = 32;
|
||||
} else {
|
||||
numSrcOps = 1;
|
||||
vgprBitOffset = 0;
|
||||
}
|
||||
|
||||
// Add one to skip over the destination reg operand.
|
||||
for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) {
|
||||
if (!MI.getOperand(opIdx).isReg()) {
|
||||
continue;
|
||||
}
|
||||
unsigned reg = MI.getOperand(opIdx).getReg();
|
||||
if (AMDGPU::VReg_32RegClass.contains(reg)
|
||||
|| AMDGPU::VReg_64RegClass.contains(reg)) {
|
||||
Value |= (VGPR_BIT(opIdx)) << vgprBitOffset;
|
||||
}
|
||||
}
|
||||
return Value;
|
||||
}
|
||||
|
||||
|
||||
void SICodeEmitter::outputBytes(uint64_t value, unsigned bytes)
|
||||
{
|
||||
for (unsigned i = 0; i < bytes; i++) {
|
||||
_OS.write((uint8_t) ((value >> (8 * i)) & 0xff));
|
||||
}
|
||||
}
|
|
@ -1,224 +0,0 @@
|
|||
#===-- SIGenRegisterInfo.pl - Script for generating register info files ----===#
|
||||
#
|
||||
# The LLVM Compiler Infrastructure
|
||||
#
|
||||
# This file is distributed under the University of Illinois Open Source
|
||||
# License. See LICENSE.TXT for details.
|
||||
#
|
||||
#===------------------------------------------------------------------------===#
|
||||
#
|
||||
# This perl script prints to stdout .td code to be used as SIRegisterInfo.td
|
||||
# it also generates a file called SIHwRegInfo.include, which contains helper
|
||||
# functions for determining the hw encoding of registers.
|
||||
#
|
||||
#===------------------------------------------------------------------------===#
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
my $SGPR_COUNT = 104;
|
||||
my $VGPR_COUNT = 256;
|
||||
|
||||
my $SGPR_MAX_IDX = $SGPR_COUNT - 1;
|
||||
my $VGPR_MAX_IDX = $VGPR_COUNT - 1;
|
||||
|
||||
my $INDEX_FILE = defined($ARGV[0]) ? $ARGV[0] : '';
|
||||
|
||||
print <<STRING;
|
||||
|
||||
let Namespace = "AMDGPU" in {
|
||||
def low : SubRegIndex;
|
||||
def high : SubRegIndex;
|
||||
|
||||
def sub0 : SubRegIndex;
|
||||
def sub1 : SubRegIndex;
|
||||
def sub2 : SubRegIndex;
|
||||
def sub3 : SubRegIndex;
|
||||
def sub4 : SubRegIndex;
|
||||
def sub5 : SubRegIndex;
|
||||
def sub6 : SubRegIndex;
|
||||
def sub7 : SubRegIndex;
|
||||
}
|
||||
|
||||
class SIReg <string n, bits<16> encoding = 0> : Register<n> {
|
||||
let Namespace = "AMDGPU";
|
||||
let HWEncoding = encoding;
|
||||
}
|
||||
|
||||
class SI_64 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
|
||||
let Namespace = "AMDGPU";
|
||||
let SubRegIndices = [low, high];
|
||||
let HWEncoding = encoding;
|
||||
}
|
||||
|
||||
class SI_128 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
|
||||
let Namespace = "AMDGPU";
|
||||
let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
|
||||
let HWEncoding = encoding;
|
||||
}
|
||||
|
||||
class SI_256 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
|
||||
let Namespace = "AMDGPU";
|
||||
let SubRegIndices = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7];
|
||||
let HWEncoding = encoding;
|
||||
}
|
||||
|
||||
class SGPR_32 <bits<16> num, string name> : SIReg<name, num>;
|
||||
|
||||
class VGPR_32 <bits<16> num, string name> : SIReg<name, num>;
|
||||
|
||||
class SGPR_64 <bits<16> num, string name, list<Register> subregs> :
|
||||
SI_64 <name, subregs, num>;
|
||||
|
||||
class VGPR_64 <bits<16> num, string name, list<Register> subregs> :
|
||||
SI_64 <name, subregs, num>;
|
||||
|
||||
class SGPR_128 <bits<16> num, string name, list<Register> subregs> :
|
||||
SI_128 <name, subregs, num>;
|
||||
|
||||
class VGPR_128 <bits<16> num, string name, list<Register> subregs> :
|
||||
SI_128 <name, subregs, num>;
|
||||
|
||||
class SGPR_256 <bits<16> num, string name, list<Register> subregs> :
|
||||
SI_256 <name, subregs, num>;
|
||||
|
||||
def VCC : SIReg<"VCC">;
|
||||
def SCC : SIReg<"SCC">;
|
||||
def SREG_LIT_0 : SIReg <"S LIT 0", 128>;
|
||||
|
||||
def M0 : SIReg <"M0", 124>;
|
||||
|
||||
//Interpolation registers
|
||||
|
||||
def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">;
|
||||
def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">;
|
||||
def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">;
|
||||
def PERSP_CENTER_J : SIReg <"PERSP_CENTER_J">;
|
||||
def PERSP_CENTROID_I : SIReg <"PERSP_CENTROID_I">;
|
||||
def PERSP_CENTROID_J : SIReg <"PERP_CENTROID_J">;
|
||||
def PERSP_I_W : SIReg <"PERSP_I_W">;
|
||||
def PERSP_J_W : SIReg <"PERSP_J_W">;
|
||||
def PERSP_1_W : SIReg <"PERSP_1_W">;
|
||||
def LINEAR_SAMPLE_I : SIReg <"LINEAR_SAMPLE_I">;
|
||||
def LINEAR_SAMPLE_J : SIReg <"LINEAR_SAMPLE_J">;
|
||||
def LINEAR_CENTER_I : SIReg <"LINEAR_CENTER_I">;
|
||||
def LINEAR_CENTER_J : SIReg <"LINEAR_CENTER_J">;
|
||||
def LINEAR_CENTROID_I : SIReg <"LINEAR_CENTROID_I">;
|
||||
def LINEAR_CENTROID_J : SIReg <"LINEAR_CENTROID_J">;
|
||||
def LINE_STIPPLE_TEX_COORD : SIReg <"LINE_STIPPLE_TEX_COORD">;
|
||||
def POS_X_FLOAT : SIReg <"POS_X_FLOAT">;
|
||||
def POS_Y_FLOAT : SIReg <"POS_Y_FLOAT">;
|
||||
def POS_Z_FLOAT : SIReg <"POS_Z_FLOAT">;
|
||||
def POS_W_FLOAT : SIReg <"POS_W_FLOAT">;
|
||||
def FRONT_FACE : SIReg <"FRONT_FACE">;
|
||||
def ANCILLARY : SIReg <"ANCILLARY">;
|
||||
def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">;
|
||||
def POS_FIXED_PT : SIReg <"POS_FIXED_PT">;
|
||||
|
||||
STRING
|
||||
|
||||
#32 bit register
|
||||
|
||||
my @SGPR;
|
||||
for (my $i = 0; $i < $SGPR_COUNT; $i++) {
|
||||
print "def SGPR$i : SGPR_32 <$i, \"SGPR$i\">;\n";
|
||||
$SGPR[$i] = "SGPR$i";
|
||||
}
|
||||
|
||||
my @VGPR;
|
||||
for (my $i = 0; $i < $VGPR_COUNT; $i++) {
|
||||
print "def VGPR$i : VGPR_32 <$i, \"VGPR$i\">;\n";
|
||||
$VGPR[$i] = "VGPR$i";
|
||||
}
|
||||
|
||||
print <<STRING;
|
||||
|
||||
def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
|
||||
(add (sequence "SGPR%u", 0, $SGPR_MAX_IDX), SREG_LIT_0, M0)
|
||||
>;
|
||||
|
||||
def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
|
||||
(add (sequence "VGPR%u", 0, $VGPR_MAX_IDX),
|
||||
PERSP_SAMPLE_I, PERSP_SAMPLE_J,
|
||||
PERSP_CENTER_I, PERSP_CENTER_J,
|
||||
PERSP_CENTROID_I, PERSP_CENTROID_J,
|
||||
PERSP_I_W, PERSP_J_W, PERSP_1_W,
|
||||
LINEAR_SAMPLE_I, LINEAR_SAMPLE_J,
|
||||
LINEAR_CENTER_I, LINEAR_CENTER_J,
|
||||
LINEAR_CENTROID_I, LINEAR_CENTROID_J,
|
||||
LINE_STIPPLE_TEX_COORD,
|
||||
POS_X_FLOAT,
|
||||
POS_Y_FLOAT,
|
||||
POS_Z_FLOAT,
|
||||
POS_W_FLOAT,
|
||||
FRONT_FACE,
|
||||
ANCILLARY,
|
||||
SAMPLE_COVERAGE,
|
||||
POS_FIXED_PT
|
||||
)
|
||||
>;
|
||||
|
||||
def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
|
||||
(add VReg_32, SReg_32)
|
||||
>;
|
||||
|
||||
def CCReg : RegisterClass<"AMDGPU", [f32], 32, (add VCC, SCC)>;
|
||||
|
||||
STRING
|
||||
|
||||
my @subregs_64 = ('low', 'high');
|
||||
my @subregs_128 = ('sel_x', 'sel_y', 'sel_z', 'sel_w');
|
||||
my @subregs_256 = ('sub0', 'sub1', 'sub2', 'sub3', 'sub4', 'sub5', 'sub6', 'sub7');
|
||||
|
||||
my @SGPR64 = print_sgpr_class(64, \@subregs_64, ('i64'));
|
||||
my @SGPR128 = print_sgpr_class(128, \@subregs_128, ('v4f32', 'v4i32'));
|
||||
my @SGPR256 = print_sgpr_class(256, \@subregs_256, ('v8i32'));
|
||||
|
||||
my @VGPR64 = print_vgpr_class(64, \@subregs_64, ('i64'));
|
||||
my @VGPR128 = print_vgpr_class(128, \@subregs_128, ('v4f32'));
|
||||
|
||||
|
||||
my $sgpr64_list = join(',', @SGPR64);
|
||||
my $vgpr64_list = join(',', @VGPR64);
|
||||
print <<STRING;
|
||||
|
||||
def AllReg_64 : RegisterClass<"AMDGPU", [f64, i64], 64,
|
||||
(add $sgpr64_list, $vgpr64_list)
|
||||
>;
|
||||
|
||||
STRING
|
||||
|
||||
sub print_sgpr_class {
|
||||
my ($reg_width, $sub_reg_ref, @types) = @_;
|
||||
return print_reg_class('SReg', 'SGPR', $reg_width, $SGPR_COUNT, $sub_reg_ref, @types);
|
||||
}
|
||||
|
||||
sub print_vgpr_class {
|
||||
my ($reg_width, $sub_reg_ref, @types) = @_;
|
||||
return print_reg_class('VReg', 'VGPR', $reg_width, $VGPR_COUNT, $sub_reg_ref, @types);
|
||||
}
|
||||
|
||||
sub print_reg_class {
|
||||
my ($class_prefix, $reg_prefix, $reg_width, $reg_count, $sub_reg_ref, @types) = @_;
|
||||
my @registers;
|
||||
my $component_count = $reg_width / 32;
|
||||
|
||||
for (my $i = 0; $i < $reg_count; $i += $component_count) {
|
||||
my $reg_name = $reg_prefix . $i . '_' . $reg_width;
|
||||
my @sub_regs;
|
||||
for (my $idx = 0; $idx < $component_count; $idx++) {
|
||||
my $sub_idx = $i + $idx;
|
||||
push(@sub_regs, $reg_prefix . $sub_idx);
|
||||
}
|
||||
print "def $reg_name : $reg_prefix\_$reg_width <$i, \"$reg_name\", [ ", join(',', @sub_regs) , "]>;\n";
|
||||
if ($i % 10 == 0) {
|
||||
$reg_name .= "\n";
|
||||
}
|
||||
push (@registers, $reg_name);
|
||||
}
|
||||
my $reg_list = join(', ', @registers);
|
||||
|
||||
print "def $class_prefix\_$reg_width : RegisterClass<\"AMDGPU\", [" . join (', ', @types) . "], $reg_width,\n (add $reg_list)\n>{\n";
|
||||
print "}\n";
|
||||
return @registers;
|
||||
}
|
|
@ -1,195 +0,0 @@
|
|||
//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file is
|
||||
// mostly EmitInstrWithCustomInserter().
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "SIISelLowering.h"
|
||||
#include "SIInstrInfo.h"
|
||||
#include "SIRegisterInfo.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||
AMDGPUTargetLowering(TM),
|
||||
TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo()))
|
||||
{
|
||||
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
|
||||
addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
|
||||
addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass);
|
||||
addRegisterClass(MVT::i64, &AMDGPU::VReg_64RegClass);
|
||||
|
||||
addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
|
||||
addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
|
||||
|
||||
computeRegisterProperties();
|
||||
|
||||
setOperationAction(ISD::ADD, MVT::i64, Legal);
|
||||
setOperationAction(ISD::ADD, MVT::i32, Legal);
|
||||
|
||||
}
|
||||
|
||||
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
||||
MachineInstr * MI, MachineBasicBlock * BB) const
|
||||
{
|
||||
const TargetInstrInfo * TII = getTargetMachine().getInstrInfo();
|
||||
MachineRegisterInfo & MRI = BB->getParent()->getRegInfo();
|
||||
MachineBasicBlock::iterator I = MI;
|
||||
|
||||
if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) {
|
||||
AppendS_WAITCNT(MI, *BB, llvm::next(I));
|
||||
return BB;
|
||||
}
|
||||
|
||||
switch (MI->getOpcode()) {
|
||||
default:
|
||||
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
|
||||
|
||||
case AMDGPU::CLAMP_SI:
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addOperand(MI->getOperand(1))
|
||||
// VSRC1-2 are unused, but we still need to fill all the
|
||||
// operand slots, so we just reuse the VSRC0 operand
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addImm(0) // ABS
|
||||
.addImm(1) // CLAMP
|
||||
.addImm(0) // OMOD
|
||||
.addImm(0); // NEG
|
||||
MI->eraseFromParent();
|
||||
break;
|
||||
|
||||
case AMDGPU::FABS_SI:
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addOperand(MI->getOperand(1))
|
||||
// VSRC1-2 are unused, but we still need to fill all the
|
||||
// operand slots, so we just reuse the VSRC0 operand
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addImm(1) // ABS
|
||||
.addImm(0) // CLAMP
|
||||
.addImm(0) // OMOD
|
||||
.addImm(0); // NEG
|
||||
MI->eraseFromParent();
|
||||
break;
|
||||
|
||||
case AMDGPU::SI_INTERP:
|
||||
LowerSI_INTERP(MI, *BB, I, MRI);
|
||||
break;
|
||||
case AMDGPU::SI_INTERP_CONST:
|
||||
LowerSI_INTERP_CONST(MI, *BB, I);
|
||||
break;
|
||||
case AMDGPU::SI_V_CNDLT:
|
||||
LowerSI_V_CNDLT(MI, *BB, I, MRI);
|
||||
break;
|
||||
case AMDGPU::USE_SGPR_32:
|
||||
case AMDGPU::USE_SGPR_64:
|
||||
lowerUSE_SGPR(MI, BB->getParent(), MRI);
|
||||
MI->eraseFromParent();
|
||||
break;
|
||||
case AMDGPU::VS_LOAD_BUFFER_INDEX:
|
||||
addLiveIn(MI, BB->getParent(), MRI, TII, AMDGPU::VGPR0);
|
||||
MI->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
return BB;
|
||||
}
|
||||
|
||||
void SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I) const
|
||||
{
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WAITCNT))
|
||||
.addImm(0);
|
||||
}
|
||||
|
||||
void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
|
||||
{
|
||||
unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
|
||||
MachineOperand dst = MI->getOperand(0);
|
||||
MachineOperand iReg = MI->getOperand(1);
|
||||
MachineOperand jReg = MI->getOperand(2);
|
||||
MachineOperand attr_chan = MI->getOperand(3);
|
||||
MachineOperand attr = MI->getOperand(4);
|
||||
MachineOperand params = MI->getOperand(5);
|
||||
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32))
|
||||
.addReg(AMDGPU::M0)
|
||||
.addOperand(params);
|
||||
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp)
|
||||
.addOperand(iReg)
|
||||
.addOperand(attr_chan)
|
||||
.addOperand(attr);
|
||||
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32))
|
||||
.addOperand(dst)
|
||||
.addReg(tmp)
|
||||
.addOperand(jReg)
|
||||
.addOperand(attr_chan)
|
||||
.addOperand(attr);
|
||||
|
||||
MI->eraseFromParent();
|
||||
}
|
||||
|
||||
void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
|
||||
MachineBasicBlock &BB, MachineBasicBlock::iterator I) const
|
||||
{
|
||||
MachineOperand dst = MI->getOperand(0);
|
||||
MachineOperand attr_chan = MI->getOperand(1);
|
||||
MachineOperand attr = MI->getOperand(2);
|
||||
MachineOperand params = MI->getOperand(3);
|
||||
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32))
|
||||
.addReg(AMDGPU::M0)
|
||||
.addOperand(params);
|
||||
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32))
|
||||
.addOperand(dst)
|
||||
.addOperand(attr_chan)
|
||||
.addOperand(attr);
|
||||
|
||||
MI->eraseFromParent();
|
||||
}
|
||||
|
||||
void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
|
||||
{
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMP_LT_F32_e32))
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addReg(AMDGPU::SREG_LIT_0);
|
||||
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addOperand(MI->getOperand(2))
|
||||
.addOperand(MI->getOperand(3));
|
||||
|
||||
MI->eraseFromParent();
|
||||
}
|
||||
|
||||
void SITargetLowering::lowerUSE_SGPR(MachineInstr *MI,
|
||||
MachineFunction * MF, MachineRegisterInfo & MRI) const
|
||||
{
|
||||
const TargetInstrInfo * TII = getTargetMachine().getInstrInfo();
|
||||
unsigned dstReg = MI->getOperand(0).getReg();
|
||||
int64_t newIndex = MI->getOperand(1).getImm();
|
||||
const TargetRegisterClass * dstClass = MRI.getRegClass(dstReg);
|
||||
unsigned DwordWidth = dstClass->getSize() / 4;
|
||||
assert(newIndex % DwordWidth == 0 && "USER_SGPR not properly aligned");
|
||||
newIndex = newIndex / DwordWidth;
|
||||
|
||||
unsigned newReg = dstClass->getRegister(newIndex);
|
||||
addLiveIn(MI, MF, MRI, TII, newReg);
|
||||
}
|
||||
|
|
@ -1,48 +0,0 @@
|
|||
//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// SI DAG Lowering interface definition
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef SIISELLOWERING_H
|
||||
#define SIISELLOWERING_H
|
||||
|
||||
#include "AMDGPUISelLowering.h"
|
||||
#include "SIInstrInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class SITargetLowering : public AMDGPUTargetLowering
|
||||
{
|
||||
const SIInstrInfo * TII;
|
||||
|
||||
/// AppendS_WAITCNT - Memory reads and writes are syncronized using the
|
||||
/// S_WAITCNT instruction. This function takes the most conservative
|
||||
/// approach and inserts an S_WAITCNT instruction after every read and
|
||||
/// write.
|
||||
void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I) const;
|
||||
void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
|
||||
void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I) const;
|
||||
void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
|
||||
void lowerUSE_SGPR(MachineInstr *MI, MachineFunction * MF,
|
||||
MachineRegisterInfo & MRI) const;
|
||||
public:
|
||||
SITargetLowering(TargetMachine &tm);
|
||||
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
|
||||
MachineBasicBlock * BB) const;
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
#endif //SIISELLOWERING_H
|
|
@ -1,128 +0,0 @@
|
|||
//===-- SIInstrFormats.td - SI Instruction Formats ------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// SI Instruction format definitions.
|
||||
//
|
||||
// Instructions with _32 take 32-bit operands.
|
||||
// Instructions with _64 take 64-bit operands.
|
||||
//
|
||||
// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
|
||||
// encoding is the standard encoding, but instruction that make use of
|
||||
// any of the instruction modifiers must use the 64-bit encoding.
|
||||
//
|
||||
// Instructions with _e32 use the 32-bit encoding.
|
||||
// Instructions with _e64 use the 64-bit encoding.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
class VOP3_32 <bits<9> op, string opName, list<dag> pattern>
|
||||
: VOP3 <op, (outs VReg_32:$dst), (ins AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
|
||||
|
||||
class VOP3_64 <bits<9> op, string opName, list<dag> pattern>
|
||||
: VOP3 <op, (outs VReg_64:$dst), (ins AllReg_64:$src0, AllReg_64:$src1, AllReg_64:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
|
||||
|
||||
|
||||
class SOP1_32 <bits<8> op, string opName, list<dag> pattern>
|
||||
: SOP1 <op, (outs SReg_32:$dst), (ins SReg_32:$src0), opName, pattern>;
|
||||
|
||||
class SOP1_64 <bits<8> op, string opName, list<dag> pattern>
|
||||
: SOP1 <op, (outs SReg_64:$dst), (ins SReg_64:$src0), opName, pattern>;
|
||||
|
||||
class SOP2_32 <bits<7> op, string opName, list<dag> pattern>
|
||||
: SOP2 <op, (outs SReg_32:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
|
||||
|
||||
class SOP2_64 <bits<7> op, string opName, list<dag> pattern>
|
||||
: SOP2 <op, (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
|
||||
|
||||
class VOP1_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
|
||||
string opName, list<dag> pattern> :
|
||||
VOP1 <
|
||||
op, (outs vrc:$dst), (ins arc:$src0), opName, pattern
|
||||
>;
|
||||
|
||||
multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern> {
|
||||
def _e32: VOP1_Helper <op, VReg_32, AllReg_32, opName, pattern>;
|
||||
def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||
opName, []
|
||||
>;
|
||||
}
|
||||
|
||||
multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern> {
|
||||
|
||||
def _e32 : VOP1_Helper <op, VReg_64, AllReg_64, opName, pattern>;
|
||||
|
||||
def _e64 : VOP3_64 <
|
||||
{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||
opName, []
|
||||
>;
|
||||
}
|
||||
|
||||
class VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
|
||||
string opName, list<dag> pattern> :
|
||||
VOP2 <
|
||||
op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName, pattern
|
||||
>;
|
||||
|
||||
multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern> {
|
||||
|
||||
def _e32 : VOP2_Helper <op, VReg_32, AllReg_32, opName, pattern>;
|
||||
|
||||
def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||
opName, []
|
||||
>;
|
||||
}
|
||||
|
||||
multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern> {
|
||||
def _e32: VOP2_Helper <op, VReg_64, AllReg_64, opName, pattern>;
|
||||
|
||||
def _e64 : VOP3_64 <
|
||||
{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||
opName, []
|
||||
>;
|
||||
}
|
||||
|
||||
class SOPK_32 <bits<5> op, string opName, list<dag> pattern>
|
||||
: SOPK <op, (outs SReg_32:$dst), (ins i16imm:$src0), opName, pattern>;
|
||||
|
||||
class SOPK_64 <bits<5> op, string opName, list<dag> pattern>
|
||||
: SOPK <op, (outs SReg_64:$dst), (ins i16imm:$src0), opName, pattern>;
|
||||
|
||||
class VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
|
||||
string opName, list<dag> pattern> :
|
||||
VOPC <
|
||||
op, (outs), (ins arc:$src0, vrc:$src1), opName, pattern
|
||||
>;
|
||||
|
||||
multiclass VOPC_32 <bits<8> op, string opName, list<dag> pattern> {
|
||||
|
||||
def _e32 : VOPC_Helper <op, VReg_32, AllReg_32, opName, pattern>;
|
||||
|
||||
def _e64 : VOP3_32 <
|
||||
{0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||
opName, []
|
||||
>;
|
||||
}
|
||||
|
||||
multiclass VOPC_64 <bits<8> op, string opName, list<dag> pattern> {
|
||||
|
||||
def _e32 : VOPC_Helper <op, VReg_64, AllReg_64, opName, pattern>;
|
||||
|
||||
def _e64 : VOP3_64 <
|
||||
{0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||
opName, []
|
||||
>;
|
||||
}
|
||||
|
||||
class SOPC_32 <bits<7> op, string opName, list<dag> pattern>
|
||||
: SOPC <op, (outs CCReg:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
|
||||
|
||||
class SOPC_64 <bits<7> op, string opName, list<dag> pattern>
|
||||
: SOPC <op, (outs CCReg:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
|
||||
|
|
@ -1,103 +0,0 @@
|
|||
//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// SI Implementation of TargetInstrInfo.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
#include "SIInstrInfo.h"
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/MC/MCInstrDesc.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm)
|
||||
: AMDGPUInstrInfo(tm),
|
||||
RI(tm, *this)
|
||||
{ }
|
||||
|
||||
const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const
|
||||
{
|
||||
return RI;
|
||||
}
|
||||
|
||||
void
|
||||
SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI, DebugLoc DL,
|
||||
unsigned DestReg, unsigned SrcReg,
|
||||
bool KillSrc) const
|
||||
{
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
}
|
||||
|
||||
unsigned SIInstrInfo::getEncodingType(const MachineInstr &MI) const
|
||||
{
|
||||
return get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK;
|
||||
}
|
||||
|
||||
unsigned SIInstrInfo::getEncodingBytes(const MachineInstr &MI) const
|
||||
{
|
||||
|
||||
/* Instructions with literal constants are expanded to 64-bits, and
|
||||
* the constant is stored in bits [63:32] */
|
||||
for (unsigned i = 0; i < MI.getNumOperands(); i++) {
|
||||
if (MI.getOperand(i).getType() == MachineOperand::MO_FPImmediate) {
|
||||
return 8;
|
||||
}
|
||||
}
|
||||
|
||||
/* This instruction always has a literal */
|
||||
if (MI.getOpcode() == AMDGPU::S_MOV_IMM_I32) {
|
||||
return 8;
|
||||
}
|
||||
|
||||
unsigned encoding_type = getEncodingType(MI);
|
||||
switch (encoding_type) {
|
||||
case SIInstrEncodingType::EXP:
|
||||
case SIInstrEncodingType::LDS:
|
||||
case SIInstrEncodingType::MUBUF:
|
||||
case SIInstrEncodingType::MTBUF:
|
||||
case SIInstrEncodingType::MIMG:
|
||||
case SIInstrEncodingType::VOP3:
|
||||
return 8;
|
||||
default:
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
|
||||
int64_t Imm) const
|
||||
{
|
||||
MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_IMM_I32), DebugLoc());
|
||||
MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
|
||||
MachineInstrBuilder(MI).addImm(Imm);
|
||||
|
||||
return MI;
|
||||
|
||||
}
|
||||
|
||||
bool SIInstrInfo::isMov(unsigned Opcode) const
|
||||
{
|
||||
switch(Opcode) {
|
||||
default: return false;
|
||||
case AMDGPU::S_MOV_B32:
|
||||
case AMDGPU::S_MOV_B64:
|
||||
case AMDGPU::V_MOV_B32_e32:
|
||||
case AMDGPU::V_MOV_B32_e64:
|
||||
case AMDGPU::V_MOV_IMM_F32:
|
||||
case AMDGPU::V_MOV_IMM_I32:
|
||||
case AMDGPU::S_MOV_IMM_I32:
|
||||
return true;
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue