forked from OSchip/llvm-project
PTX: Fix detection of stack load/store vs. global load/store, as well as fix the
printing of local offsets llvm-svn: 140547
This commit is contained in:
parent
ed0f9168de
commit
1395cf8423
|
@ -299,10 +299,12 @@ void PTXAsmPrinter::EmitFunctionBodyStart() {
|
||||||
if (FrameInfo->getObjectSize(i) > 0) {
|
if (FrameInfo->getObjectSize(i) > 0) {
|
||||||
std::string def = "\t.local .align ";
|
std::string def = "\t.local .align ";
|
||||||
def += utostr(FrameInfo->getObjectAlignment(i));
|
def += utostr(FrameInfo->getObjectAlignment(i));
|
||||||
def += " .b";
|
def += " .b8";
|
||||||
def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits
|
|
||||||
def += " __local";
|
def += " __local";
|
||||||
def += utostr(i);
|
def += utostr(i);
|
||||||
|
def += "[";
|
||||||
|
def += utostr(FrameInfo->getObjectSize(i)); // Convert to bits
|
||||||
|
def += "]";
|
||||||
def += ";";
|
def += ";";
|
||||||
OutStreamer.EmitRawText(Twine(def));
|
OutStreamer.EmitRawText(Twine(def));
|
||||||
}
|
}
|
||||||
|
@ -465,6 +467,11 @@ void PTXAsmPrinter::printReturnOperand(const MachineInstr *MI, int opNum,
|
||||||
void PTXAsmPrinter::printLocalOperand(const MachineInstr *MI, int opNum,
|
void PTXAsmPrinter::printLocalOperand(const MachineInstr *MI, int opNum,
|
||||||
raw_ostream &OS, const char *Modifier) {
|
raw_ostream &OS, const char *Modifier) {
|
||||||
OS << "__local" << MI->getOperand(opNum).getImm();
|
OS << "__local" << MI->getOperand(opNum).getImm();
|
||||||
|
|
||||||
|
if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() != 0){
|
||||||
|
OS << "+";
|
||||||
|
printOperand(MI, opNum+1, OS);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
|
void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
|
||||||
|
|
|
@ -213,14 +213,54 @@ bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
|
||||||
// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
|
// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
|
||||||
bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
|
bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
|
||||||
SDValue &Offset) {
|
SDValue &Offset) {
|
||||||
if (Addr.getOpcode() != ISD::ADD) {
|
// FrameIndex addresses are handled separately
|
||||||
|
//errs() << "SelectADDRri: ";
|
||||||
|
//Addr.getNode()->dumpr();
|
||||||
|
if (isa<FrameIndexSDNode>(Addr)) {
|
||||||
|
//errs() << "Failure\n";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (CurDAG->isBaseWithConstantOffset(Addr)) {
|
||||||
|
Base = Addr.getOperand(0);
|
||||||
|
if (isa<FrameIndexSDNode>(Base)) {
|
||||||
|
//errs() << "Failure\n";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
|
||||||
|
Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
|
||||||
|
//errs() << "Success\n";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*if (Addr.getNumOperands() == 1) {
|
||||||
|
Base = Addr;
|
||||||
|
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
|
||||||
|
errs() << "Success\n";
|
||||||
|
return true;
|
||||||
|
}*/
|
||||||
|
|
||||||
|
//errs() << "SelectADDRri fails on: ";
|
||||||
|
//Addr.getNode()->dumpr();
|
||||||
|
|
||||||
|
if (isImm(Addr)) {
|
||||||
|
//errs() << "Failure\n";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
Base = Addr;
|
||||||
|
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
|
||||||
|
|
||||||
|
//errs() << "Success\n";
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/*if (Addr.getOpcode() != ISD::ADD) {
|
||||||
// let SelectADDRii handle the [imm] case
|
// let SelectADDRii handle the [imm] case
|
||||||
if (isImm(Addr))
|
if (isImm(Addr))
|
||||||
return false;
|
return false;
|
||||||
// it is [reg]
|
// it is [reg]
|
||||||
|
|
||||||
assert(Addr.getValueType().isSimple() && "Type must be simple");
|
assert(Addr.getValueType().isSimple() && "Type must be simple");
|
||||||
|
|
||||||
Base = Addr;
|
Base = Addr;
|
||||||
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
|
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
|
||||||
|
|
||||||
|
@ -242,7 +282,7 @@ bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
|
||||||
}
|
}
|
||||||
|
|
||||||
// neither [reg+imm] nor [imm+reg]
|
// neither [reg+imm] nor [imm+reg]
|
||||||
return false;
|
return false;*/
|
||||||
}
|
}
|
||||||
|
|
||||||
// Match memory operand of the form [imm+imm] and [imm]
|
// Match memory operand of the form [imm+imm] and [imm]
|
||||||
|
@ -269,35 +309,30 @@ bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base,
|
||||||
// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
|
// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
|
||||||
bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base,
|
bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base,
|
||||||
SDValue &Offset) {
|
SDValue &Offset) {
|
||||||
if (Addr.getOpcode() != ISD::ADD) {
|
//errs() << "SelectADDRlocal: ";
|
||||||
// let SelectADDRii handle the [imm] case
|
//Addr.getNode()->dumpr();
|
||||||
if (isImm(Addr))
|
if (isa<FrameIndexSDNode>(Addr)) {
|
||||||
return false;
|
|
||||||
// it is [reg]
|
|
||||||
|
|
||||||
assert(Addr.getValueType().isSimple() && "Type must be simple");
|
|
||||||
|
|
||||||
Base = Addr;
|
Base = Addr;
|
||||||
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
|
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
|
||||||
|
//errs() << "Success\n";
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Addr.getNumOperands() < 2)
|
if (CurDAG->isBaseWithConstantOffset(Addr)) {
|
||||||
|
Base = Addr.getOperand(0);
|
||||||
|
if (!isa<FrameIndexSDNode>(Base)) {
|
||||||
|
//errs() << "Failure\n";
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
// let SelectADDRii handle the [imm+imm] case
|
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
|
||||||
if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1)))
|
Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
|
||||||
return false;
|
//errs() << "Offset: ";
|
||||||
|
//Offset.getNode()->dumpr();
|
||||||
// try [reg+imm] and [imm+reg]
|
//errs() << "Success\n";
|
||||||
for (int i = 0; i < 2; i ++)
|
|
||||||
if (SelectImm(Addr.getOperand(1-i), Offset)) {
|
|
||||||
Base = Addr.getOperand(i);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// neither [reg+imm] nor [imm+reg]
|
//errs() << "Failure\n";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,9 +24,7 @@ def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
|
||||||
def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||||
const Value *Src;
|
const Value *Src;
|
||||||
const PointerType *PT;
|
const PointerType *PT;
|
||||||
const SDValue &MemOp = N->getOperand(1);
|
if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
|
||||||
if ((MemOp.getOpcode() != ISD::FrameIndex) &&
|
|
||||||
(Src = cast<LoadSDNode>(N)->getSrcValue()) &&
|
|
||||||
(PT = dyn_cast<PointerType>(Src->getType())))
|
(PT = dyn_cast<PointerType>(Src->getType())))
|
||||||
return PT->getAddressSpace() == PTX::GLOBAL;
|
return PT->getAddressSpace() == PTX::GLOBAL;
|
||||||
return false;
|
return false;
|
||||||
|
@ -41,11 +39,6 @@ def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||||
return false;
|
return false;
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
def load_local : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
|
||||||
const SDValue &MemOp = N->getOperand(1);
|
|
||||||
return MemOp.getOpcode() == ISD::FrameIndex;
|
|
||||||
}]>;
|
|
||||||
|
|
||||||
def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||||
const Value *Src;
|
const Value *Src;
|
||||||
const PointerType *PT;
|
const PointerType *PT;
|
||||||
|
@ -59,20 +52,12 @@ def store_global
|
||||||
: PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
|
: PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
|
||||||
const Value *Src;
|
const Value *Src;
|
||||||
const PointerType *PT;
|
const PointerType *PT;
|
||||||
const SDValue &MemOp = N->getOperand(2);
|
if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
|
||||||
if ((MemOp.getOpcode() != ISD::FrameIndex) &&
|
|
||||||
(Src = cast<StoreSDNode>(N)->getSrcValue()) &&
|
|
||||||
(PT = dyn_cast<PointerType>(Src->getType())))
|
(PT = dyn_cast<PointerType>(Src->getType())))
|
||||||
return PT->getAddressSpace() == PTX::GLOBAL;
|
return PT->getAddressSpace() == PTX::GLOBAL;
|
||||||
return false;
|
return false;
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
def store_local
|
|
||||||
: PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
|
|
||||||
const SDValue &MemOp = N->getOperand(2);
|
|
||||||
return MemOp.getOpcode() == ISD::FrameIndex;
|
|
||||||
}]>;
|
|
||||||
|
|
||||||
def store_shared
|
def store_shared
|
||||||
: PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
|
: PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
|
||||||
const Value *Src;
|
const Value *Src;
|
||||||
|
@ -221,16 +206,16 @@ multiclass PTX_ST<string opstr, string typestr, RegisterClass RC,
|
||||||
multiclass PTX_LOCAL_LD_ST<string typestr, RegisterClass RC> {
|
multiclass PTX_LOCAL_LD_ST<string typestr, RegisterClass RC> {
|
||||||
def LDri32 : InstPTX<(outs RC:$d), (ins LOCALri32:$a),
|
def LDri32 : InstPTX<(outs RC:$d), (ins LOCALri32:$a),
|
||||||
!strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
|
!strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
|
||||||
[(set RC:$d, (load_local ADDRlocal32:$a))]>;
|
[(set RC:$d, (load_global ADDRlocal32:$a))]>;
|
||||||
def LDri64 : InstPTX<(outs RC:$d), (ins LOCALri64:$a),
|
def LDri64 : InstPTX<(outs RC:$d), (ins LOCALri64:$a),
|
||||||
!strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
|
!strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
|
||||||
[(set RC:$d, (load_local ADDRlocal64:$a))]>;
|
[(set RC:$d, (load_global ADDRlocal64:$a))]>;
|
||||||
def STri32 : InstPTX<(outs), (ins RC:$d, LOCALri32:$a),
|
def STri32 : InstPTX<(outs), (ins RC:$d, LOCALri32:$a),
|
||||||
!strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
|
!strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
|
||||||
[(store_local RC:$d, ADDRlocal32:$a)]>;
|
[(store_global RC:$d, ADDRlocal32:$a)]>;
|
||||||
def STri64 : InstPTX<(outs), (ins RC:$d, LOCALri64:$a),
|
def STri64 : InstPTX<(outs), (ins RC:$d, LOCALri64:$a),
|
||||||
!strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
|
!strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
|
||||||
[(store_local RC:$d, ADDRlocal64:$a)]>;
|
[(store_global RC:$d, ADDRlocal64:$a)]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass PTX_PARAM_LD_ST<string typestr, RegisterClass RC> {
|
multiclass PTX_PARAM_LD_ST<string typestr, RegisterClass RC> {
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
|
; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
|
||||||
|
|
||||||
define ptx_device float @stack1(float %a) {
|
define ptx_device float @stack1(float %a) {
|
||||||
; CHECK: .local .align 4 .b32 __local0;
|
; CHECK: .local .align 4 .b8 __local0[4];
|
||||||
%a.2 = alloca float, align 4
|
%a.2 = alloca float, align 4
|
||||||
; CHECK: st.local.f32 [__local0], %f0
|
; CHECK: st.local.f32 [__local0], %f0
|
||||||
store float %a, float* %a.2
|
store float %a, float* %a.2
|
||||||
|
@ -10,7 +10,7 @@ define ptx_device float @stack1(float %a) {
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device float @stack1_align8(float %a) {
|
define ptx_device float @stack1_align8(float %a) {
|
||||||
; CHECK: .local .align 8 .b32 __local0;
|
; CHECK: .local .align 8 .b8 __local0[4];
|
||||||
%a.2 = alloca float, align 8
|
%a.2 = alloca float, align 8
|
||||||
; CHECK: st.local.f32 [__local0], %f0
|
; CHECK: st.local.f32 [__local0], %f0
|
||||||
store float %a, float* %a.2
|
store float %a, float* %a.2
|
||||||
|
|
Loading…
Reference in New Issue