PTX: Implement PTXSelectionDAGInfo

llvm-svn: 140549
This commit is contained in:
Justin Holewinski 2011-09-26 18:57:27 +00:00
parent c3edaddfea
commit b40da7f956
5 changed files with 214 additions and 5 deletions

View File

@ -18,6 +18,7 @@ add_llvm_target(PTXCodeGen
PTXParamManager.cpp
PTXRegAlloc.cpp
PTXRegisterInfo.cpp
PTXSelectionDAGInfo.cpp
PTXSubtarget.cpp
PTXTargetMachine.cpp
)

View File

@ -0,0 +1,148 @@
//===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the PTXSelectionDAGInfo class.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "ptx-selectiondag-info"
#include "PTXTargetMachine.h"
#include "llvm/DerivedTypes.h"
#include "llvm/CodeGen/SelectionDAG.h"
using namespace llvm;
PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM)
: TargetSelectionDAGInfo(TM),
Subtarget(&TM.getSubtarget<PTXSubtarget>()) {
}
PTXSelectionDAGInfo::~PTXSelectionDAGInfo() {
}
SDValue
PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
SDValue Chain,
SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const {
// Do repeated 4-byte loads and stores. To be improved.
// This requires 4-byte alignment.
if ((Align & 3) != 0)
return SDValue();
// This requires the copy size to be a constant, preferably
// within a subtarget-specific limit.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (!ConstantSize)
return SDValue();
uint64_t SizeVal = ConstantSize->getZExtValue();
// Always inline memcpys. In PTX, we do not have a C library that provides
// a memcpy function.
//if (!AlwaysInline)
// return SDValue();
unsigned BytesLeft = SizeVal & 3;
unsigned NumMemOps = SizeVal >> 2;
unsigned EmittedNumMemOps = 0;
EVT VT = MVT::i32;
unsigned VTSize = 4;
unsigned i = 0;
const unsigned MAX_LOADS_IN_LDM = 6;
SDValue TFOps[MAX_LOADS_IN_LDM];
SDValue Loads[MAX_LOADS_IN_LDM];
uint64_t SrcOff = 0, DstOff = 0;
// Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
// same number of stores. The loads and stores will get combined into
// ldm/stm later on.
while (EmittedNumMemOps < NumMemOps) {
for (i = 0;
i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
Loads[i] = DAG.getLoad(VT, dl, Chain,
DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
DAG.getConstant(SrcOff, MVT::i32)),
SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
false, 0);
TFOps[i] = Loads[i].getValue(1);
SrcOff += VTSize;
}
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
for (i = 0;
i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
DAG.getConstant(DstOff, MVT::i32)),
DstPtrInfo.getWithOffset(DstOff),
isVolatile, false, 0);
DstOff += VTSize;
}
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
EmittedNumMemOps += i;
}
if (BytesLeft == 0)
return Chain;
// Issue loads / stores for the trailing (1 - 3) bytes.
unsigned BytesLeftSave = BytesLeft;
i = 0;
while (BytesLeft) {
if (BytesLeft >= 2) {
VT = MVT::i16;
VTSize = 2;
} else {
VT = MVT::i8;
VTSize = 1;
}
Loads[i] = DAG.getLoad(VT, dl, Chain,
DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
DAG.getConstant(SrcOff, MVT::i32)),
SrcPtrInfo.getWithOffset(SrcOff), false, false, 0);
TFOps[i] = Loads[i].getValue(1);
++i;
SrcOff += VTSize;
BytesLeft -= VTSize;
}
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
i = 0;
BytesLeft = BytesLeftSave;
while (BytesLeft) {
if (BytesLeft >= 2) {
VT = MVT::i16;
VTSize = 2;
} else {
VT = MVT::i8;
VTSize = 1;
}
TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
DAG.getConstant(DstOff, MVT::i32)),
DstPtrInfo.getWithOffset(DstOff), false, false, 0);
++i;
DstOff += VTSize;
BytesLeft -= VTSize;
}
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
}
SDValue PTXSelectionDAGInfo::
EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
SDValue Chain, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVolatile,
MachinePointerInfo DstPtrInfo) const {
llvm_unreachable("memset lowering not implemented for PTX yet");
}

View File

@ -0,0 +1,53 @@
//===-- PTXSelectionDAGInfo.h - PTX SelectionDAG Info -----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the PTX subclass for TargetSelectionDAGInfo.
//
//===----------------------------------------------------------------------===//
#ifndef PTXSELECTIONDAGINFO_H
#define PTXSELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
namespace llvm {
/// PTXSelectionDAGInfo - TargetSelectionDAGInfo sub-class for the PTX target.
/// At the moment, this is mostly just a copy of ARMSelectionDAGInfo.
class PTXSelectionDAGInfo : public TargetSelectionDAGInfo {
/// Subtarget - Keep a pointer to the PTXSubtarget around so that we can
/// make the right decision when generating code for different targets.
const PTXSubtarget *Subtarget;
public:
explicit PTXSelectionDAGInfo(const TargetMachine &TM);
~PTXSelectionDAGInfo();
virtual
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
SDValue Chain,
SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const;
virtual
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
SDValue Chain,
SDValue Op1, SDValue Op2,
SDValue Op3, unsigned Align,
bool isVolatile,
MachinePointerInfo DstPtrInfo) const;
};
}
#endif

View File

@ -94,6 +94,7 @@ PTXTargetMachine::PTXTargetMachine(const Target &T,
Subtarget(TT, CPU, FS, is64Bit),
FrameLowering(Subtarget),
InstrInfo(*this),
TSInfo(*this),
TLInfo(*this) {
}

View File

@ -17,6 +17,7 @@
#include "PTXISelLowering.h"
#include "PTXInstrInfo.h"
#include "PTXFrameLowering.h"
#include "PTXSelectionDAGInfo.h"
#include "PTXSubtarget.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
@ -25,11 +26,12 @@
namespace llvm {
class PTXTargetMachine : public LLVMTargetMachine {
private:
const TargetData DataLayout;
PTXSubtarget Subtarget; // has to be initialized before FrameLowering
PTXFrameLowering FrameLowering;
PTXInstrInfo InstrInfo;
PTXTargetLowering TLInfo;
const TargetData DataLayout;
PTXSubtarget Subtarget; // has to be initialized before FrameLowering
PTXFrameLowering FrameLowering;
PTXInstrInfo InstrInfo;
PTXSelectionDAGInfo TSInfo;
PTXTargetLowering TLInfo;
public:
PTXTargetMachine(const Target &T, StringRef TT,
@ -50,6 +52,10 @@ class PTXTargetMachine : public LLVMTargetMachine {
virtual const PTXTargetLowering *getTargetLowering() const {
return &TLInfo; }
virtual const PTXSelectionDAGInfo* getSelectionDAGInfo() const {
return &TSInfo;
}
virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
virtual bool addInstSelector(PassManagerBase &PM,