forked from OSchip/llvm-project
parent
c3edaddfea
commit
b40da7f956
|
@ -18,6 +18,7 @@ add_llvm_target(PTXCodeGen
|
|||
PTXParamManager.cpp
|
||||
PTXRegAlloc.cpp
|
||||
PTXRegisterInfo.cpp
|
||||
PTXSelectionDAGInfo.cpp
|
||||
PTXSubtarget.cpp
|
||||
PTXTargetMachine.cpp
|
||||
)
|
||||
|
|
|
@ -0,0 +1,148 @@
|
|||
//===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the PTXSelectionDAGInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "ptx-selectiondag-info"
|
||||
#include "PTXTargetMachine.h"
|
||||
#include "llvm/DerivedTypes.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
using namespace llvm;
|
||||
|
||||
PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM)
|
||||
: TargetSelectionDAGInfo(TM),
|
||||
Subtarget(&TM.getSubtarget<PTXSubtarget>()) {
|
||||
}
|
||||
|
||||
PTXSelectionDAGInfo::~PTXSelectionDAGInfo() {
|
||||
}
|
||||
|
||||
SDValue
|
||||
PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
|
||||
SDValue Chain,
|
||||
SDValue Dst, SDValue Src,
|
||||
SDValue Size, unsigned Align,
|
||||
bool isVolatile, bool AlwaysInline,
|
||||
MachinePointerInfo DstPtrInfo,
|
||||
MachinePointerInfo SrcPtrInfo) const {
|
||||
// Do repeated 4-byte loads and stores. To be improved.
|
||||
// This requires 4-byte alignment.
|
||||
if ((Align & 3) != 0)
|
||||
return SDValue();
|
||||
// This requires the copy size to be a constant, preferably
|
||||
// within a subtarget-specific limit.
|
||||
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
|
||||
if (!ConstantSize)
|
||||
return SDValue();
|
||||
uint64_t SizeVal = ConstantSize->getZExtValue();
|
||||
// Always inline memcpys. In PTX, we do not have a C library that provides
|
||||
// a memcpy function.
|
||||
//if (!AlwaysInline)
|
||||
// return SDValue();
|
||||
|
||||
unsigned BytesLeft = SizeVal & 3;
|
||||
unsigned NumMemOps = SizeVal >> 2;
|
||||
unsigned EmittedNumMemOps = 0;
|
||||
EVT VT = MVT::i32;
|
||||
unsigned VTSize = 4;
|
||||
unsigned i = 0;
|
||||
const unsigned MAX_LOADS_IN_LDM = 6;
|
||||
SDValue TFOps[MAX_LOADS_IN_LDM];
|
||||
SDValue Loads[MAX_LOADS_IN_LDM];
|
||||
uint64_t SrcOff = 0, DstOff = 0;
|
||||
|
||||
// Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
|
||||
// same number of stores. The loads and stores will get combined into
|
||||
// ldm/stm later on.
|
||||
while (EmittedNumMemOps < NumMemOps) {
|
||||
for (i = 0;
|
||||
i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
|
||||
Loads[i] = DAG.getLoad(VT, dl, Chain,
|
||||
DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
|
||||
DAG.getConstant(SrcOff, MVT::i32)),
|
||||
SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
|
||||
false, 0);
|
||||
TFOps[i] = Loads[i].getValue(1);
|
||||
SrcOff += VTSize;
|
||||
}
|
||||
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
|
||||
|
||||
for (i = 0;
|
||||
i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
|
||||
TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
|
||||
DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
|
||||
DAG.getConstant(DstOff, MVT::i32)),
|
||||
DstPtrInfo.getWithOffset(DstOff),
|
||||
isVolatile, false, 0);
|
||||
DstOff += VTSize;
|
||||
}
|
||||
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
|
||||
|
||||
EmittedNumMemOps += i;
|
||||
}
|
||||
|
||||
if (BytesLeft == 0)
|
||||
return Chain;
|
||||
|
||||
// Issue loads / stores for the trailing (1 - 3) bytes.
|
||||
unsigned BytesLeftSave = BytesLeft;
|
||||
i = 0;
|
||||
while (BytesLeft) {
|
||||
if (BytesLeft >= 2) {
|
||||
VT = MVT::i16;
|
||||
VTSize = 2;
|
||||
} else {
|
||||
VT = MVT::i8;
|
||||
VTSize = 1;
|
||||
}
|
||||
|
||||
Loads[i] = DAG.getLoad(VT, dl, Chain,
|
||||
DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
|
||||
DAG.getConstant(SrcOff, MVT::i32)),
|
||||
SrcPtrInfo.getWithOffset(SrcOff), false, false, 0);
|
||||
TFOps[i] = Loads[i].getValue(1);
|
||||
++i;
|
||||
SrcOff += VTSize;
|
||||
BytesLeft -= VTSize;
|
||||
}
|
||||
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
|
||||
|
||||
i = 0;
|
||||
BytesLeft = BytesLeftSave;
|
||||
while (BytesLeft) {
|
||||
if (BytesLeft >= 2) {
|
||||
VT = MVT::i16;
|
||||
VTSize = 2;
|
||||
} else {
|
||||
VT = MVT::i8;
|
||||
VTSize = 1;
|
||||
}
|
||||
|
||||
TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
|
||||
DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
|
||||
DAG.getConstant(DstOff, MVT::i32)),
|
||||
DstPtrInfo.getWithOffset(DstOff), false, false, 0);
|
||||
++i;
|
||||
DstOff += VTSize;
|
||||
BytesLeft -= VTSize;
|
||||
}
|
||||
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
|
||||
}
|
||||
|
||||
SDValue PTXSelectionDAGInfo::
|
||||
EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
|
||||
SDValue Chain, SDValue Dst,
|
||||
SDValue Src, SDValue Size,
|
||||
unsigned Align, bool isVolatile,
|
||||
MachinePointerInfo DstPtrInfo) const {
|
||||
llvm_unreachable("memset lowering not implemented for PTX yet");
|
||||
}
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
//===-- PTXSelectionDAGInfo.h - PTX SelectionDAG Info -----------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the PTX subclass for TargetSelectionDAGInfo.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef PTXSELECTIONDAGINFO_H
|
||||
#define PTXSELECTIONDAGINFO_H
|
||||
|
||||
#include "llvm/Target/TargetSelectionDAGInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/// PTXSelectionDAGInfo - TargetSelectionDAGInfo sub-class for the PTX target.
|
||||
/// At the moment, this is mostly just a copy of ARMSelectionDAGInfo.
|
||||
class PTXSelectionDAGInfo : public TargetSelectionDAGInfo {
|
||||
/// Subtarget - Keep a pointer to the PTXSubtarget around so that we can
|
||||
/// make the right decision when generating code for different targets.
|
||||
const PTXSubtarget *Subtarget;
|
||||
|
||||
public:
|
||||
explicit PTXSelectionDAGInfo(const TargetMachine &TM);
|
||||
~PTXSelectionDAGInfo();
|
||||
|
||||
virtual
|
||||
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
|
||||
SDValue Chain,
|
||||
SDValue Dst, SDValue Src,
|
||||
SDValue Size, unsigned Align,
|
||||
bool isVolatile, bool AlwaysInline,
|
||||
MachinePointerInfo DstPtrInfo,
|
||||
MachinePointerInfo SrcPtrInfo) const;
|
||||
|
||||
virtual
|
||||
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
|
||||
SDValue Chain,
|
||||
SDValue Op1, SDValue Op2,
|
||||
SDValue Op3, unsigned Align,
|
||||
bool isVolatile,
|
||||
MachinePointerInfo DstPtrInfo) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -94,6 +94,7 @@ PTXTargetMachine::PTXTargetMachine(const Target &T,
|
|||
Subtarget(TT, CPU, FS, is64Bit),
|
||||
FrameLowering(Subtarget),
|
||||
InstrInfo(*this),
|
||||
TSInfo(*this),
|
||||
TLInfo(*this) {
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "PTXISelLowering.h"
|
||||
#include "PTXInstrInfo.h"
|
||||
#include "PTXFrameLowering.h"
|
||||
#include "PTXSelectionDAGInfo.h"
|
||||
#include "PTXSubtarget.h"
|
||||
#include "llvm/Target/TargetData.h"
|
||||
#include "llvm/Target/TargetFrameLowering.h"
|
||||
|
@ -25,11 +26,12 @@
|
|||
namespace llvm {
|
||||
class PTXTargetMachine : public LLVMTargetMachine {
|
||||
private:
|
||||
const TargetData DataLayout;
|
||||
PTXSubtarget Subtarget; // has to be initialized before FrameLowering
|
||||
PTXFrameLowering FrameLowering;
|
||||
PTXInstrInfo InstrInfo;
|
||||
PTXTargetLowering TLInfo;
|
||||
const TargetData DataLayout;
|
||||
PTXSubtarget Subtarget; // has to be initialized before FrameLowering
|
||||
PTXFrameLowering FrameLowering;
|
||||
PTXInstrInfo InstrInfo;
|
||||
PTXSelectionDAGInfo TSInfo;
|
||||
PTXTargetLowering TLInfo;
|
||||
|
||||
public:
|
||||
PTXTargetMachine(const Target &T, StringRef TT,
|
||||
|
@ -50,6 +52,10 @@ class PTXTargetMachine : public LLVMTargetMachine {
|
|||
virtual const PTXTargetLowering *getTargetLowering() const {
|
||||
return &TLInfo; }
|
||||
|
||||
virtual const PTXSelectionDAGInfo* getSelectionDAGInfo() const {
|
||||
return &TSInfo;
|
||||
}
|
||||
|
||||
virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
|
||||
|
||||
virtual bool addInstSelector(PassManagerBase &PM,
|
||||
|
|
Loading…
Reference in New Issue