forked from OSchip/llvm-project
parent
8c80019352
commit
d40f5ababf
|
@ -68,6 +68,8 @@ public:
|
|||
const char *Modifier = 0);
|
||||
void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
|
||||
const char *Modifier = 0);
|
||||
void printLocalOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
|
||||
const char *Modifier = 0);
|
||||
void printReturnOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
|
||||
const char *Modifier = 0);
|
||||
void printPredicateOperand(const MachineInstr *MI, raw_ostream &O);
|
||||
|
@ -297,7 +299,7 @@ void PTXAsmPrinter::EmitFunctionBodyStart() {
|
|||
if (FrameInfo->getObjectSize(i) > 0) {
|
||||
std::string def = "\t.local .b";
|
||||
def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits
|
||||
def += " __local_";
|
||||
def += " __local";
|
||||
def += utostr(i);
|
||||
def += ";";
|
||||
OutStreamer.EmitRawText(Twine(def));
|
||||
|
@ -458,6 +460,11 @@ void PTXAsmPrinter::printReturnOperand(const MachineInstr *MI, int opNum,
|
|||
OS << "__ret";
|
||||
}
|
||||
|
||||
void PTXAsmPrinter::printLocalOperand(const MachineInstr *MI, int opNum,
|
||||
raw_ostream &OS, const char *Modifier) {
|
||||
OS << "__local" << MI->getOperand(opNum).getImm();
|
||||
}
|
||||
|
||||
void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
|
||||
// Check to see if this is a special global used by LLVM, if so, emit it.
|
||||
if (EmitSpecialLLVMGlobal(gv))
|
||||
|
|
|
@ -147,6 +147,14 @@ def MEMri64 : Operand<i64> {
|
|||
let PrintMethod = "printMemOperand";
|
||||
let MIOperandInfo = (ops RegI64, i64imm);
|
||||
}
|
||||
def LOCALri32 : Operand<i32> {
|
||||
let PrintMethod = "printLocalOperand";
|
||||
let MIOperandInfo = (ops RegI32, i32imm);
|
||||
}
|
||||
def LOCALri64 : Operand<i64> {
|
||||
let PrintMethod = "printLocalOperand";
|
||||
let MIOperandInfo = (ops RegI64, i64imm);
|
||||
}
|
||||
def MEMii32 : Operand<i32> {
|
||||
let PrintMethod = "printMemOperand";
|
||||
let MIOperandInfo = (ops i32imm, i32imm);
|
||||
|
@ -602,6 +610,21 @@ multiclass PTX_LD<string opstr, string typestr,
|
|||
Requires<[Use64BitAddresses]>;
|
||||
}
|
||||
|
||||
multiclass PTX_LOCAL_LD_ST<string typestr, RegisterClass RC> {
|
||||
def LDri32 : InstPTX<(outs RC:$d), (ins LOCALri32:$a),
|
||||
!strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
|
||||
[(set RC:$d, (load_local ADDRlocal32:$a))]>;
|
||||
def LDri64 : InstPTX<(outs RC:$d), (ins LOCALri64:$a),
|
||||
!strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
|
||||
[(set RC:$d, (load_local ADDRlocal64:$a))]>;
|
||||
def STri32 : InstPTX<(outs), (ins RC:$d, LOCALri32:$a),
|
||||
!strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
|
||||
[(store_local RC:$d, ADDRlocal32:$a)]>;
|
||||
def STri64 : InstPTX<(outs), (ins RC:$d, LOCALri64:$a),
|
||||
!strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
|
||||
[(store_local RC:$d, ADDRlocal64:$a)]>;
|
||||
}
|
||||
|
||||
multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
|
||||
defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>;
|
||||
defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>;
|
||||
|
@ -960,86 +983,18 @@ let hasSideEffects = 1 in {
|
|||
[(pat_store RC:$d, ADDRri64:$a)]>,
|
||||
Requires<[Use64BitAddresses]>;
|
||||
*/
|
||||
let hasSideEffects = 1 in {
|
||||
def LDLOCALpiPred : InstPTX<(outs RegPred:$d), (ins MEMri32:$a),
|
||||
"ld.local.pred\t$d, [__local_$a]",
|
||||
[(set RegPred:$d, (load_local ADDRlocal32:$a))]>;
|
||||
def LDLOCALpiU16 : InstPTX<(outs RegI16:$d), (ins MEMri32:$a),
|
||||
"ld.local.u16\t$d, [__local_$a]",
|
||||
[(set RegI16:$d, (load_local ADDRlocal32:$a))]>;
|
||||
def LDLOCALpiU32 : InstPTX<(outs RegI32:$d), (ins MEMri32:$a),
|
||||
"ld.local.u32\t$d, [__local_$a]",
|
||||
[(set RegI32:$d, (load_local ADDRlocal32:$a))]>;
|
||||
def LDLOCALpiU64 : InstPTX<(outs RegI64:$d), (ins MEMri32:$a),
|
||||
"ld.local.u64\t$d, [__local_$a]",
|
||||
[(set RegI64:$d, (load_local ADDRlocal32:$a))]>;
|
||||
def LDLOCALpiF32 : InstPTX<(outs RegF32:$d), (ins MEMri32:$a),
|
||||
"ld.local.f32\t$d, [__local_$a]",
|
||||
[(set RegF32:$d, (load_local ADDRlocal32:$a))]>;
|
||||
def LDLOCALpiF64 : InstPTX<(outs RegF64:$d), (ins MEMri32:$a),
|
||||
"ld.local.f64\t$d, [__local_$a]",
|
||||
[(set RegF64:$d, (load_local ADDRlocal32:$a))]>;
|
||||
|
||||
def STLOCALpiPred : InstPTX<(outs), (ins RegPred:$d, MEMri32:$a),
|
||||
"st.local.pred\t[__local_$a], $d",
|
||||
[(store_local RegPred:$d, ADDRlocal32:$a)]>;
|
||||
def STLOCALpiU16 : InstPTX<(outs), (ins RegI16:$d, MEMri32:$a),
|
||||
"st.local.u16\t[__local_$a], $d",
|
||||
[(store_local RegI16:$d, ADDRlocal32:$a)]>;
|
||||
def STLOCALpiU32 : InstPTX<(outs), (ins RegI32:$d, MEMri32:$a),
|
||||
"st.local.u32\t[__local_$a], $d",
|
||||
[(store_local RegI32:$d, ADDRlocal32:$a)]>;
|
||||
def STLOCALpiU64 : InstPTX<(outs), (ins RegI64:$d, MEMri32:$a),
|
||||
"st.local.u64\t[__local_$a], $d",
|
||||
[(store_local RegI64:$d, ADDRlocal32:$a)]>;
|
||||
def STLOCALpiF32 : InstPTX<(outs), (ins RegF32:$d, MEMri32:$a),
|
||||
"st.local.f32\t[__local_$a], $d",
|
||||
[(store_local RegF32:$d, ADDRlocal32:$a)]>;
|
||||
def STLOCALpiF64 : InstPTX<(outs), (ins RegF64:$d, MEMri32:$a),
|
||||
"st.local.f64\t[__local_$a], $d",
|
||||
[(store_local RegF64:$d, ADDRlocal32:$a)]>;
|
||||
|
||||
/*def LDLOCALpiU16 : InstPTX<(outs RegI16:$d), (ins MEMpi:$a),
|
||||
"ld.param.u16\t$d, [$a]",
|
||||
[(set RegI16:$d, (PTXloadparam timm:$a))]>;
|
||||
def LDLOCALpiU32 : InstPTX<(outs RegI32:$d), (ins MEMpi:$a),
|
||||
"ld.param.u32\t$d, [$a]",
|
||||
[(set RegI32:$d, (PTXloadparam timm:$a))]>;
|
||||
def LDLOCALpiU64 : InstPTX<(outs RegI64:$d), (ins MEMpi:$a),
|
||||
"ld.param.u64\t$d, [$a]",
|
||||
[(set RegI64:$d, (PTXloadparam timm:$a))]>;
|
||||
def LDLOCALpiF32 : InstPTX<(outs RegF32:$d), (ins MEMpi:$a),
|
||||
"ld.param.f32\t$d, [$a]",
|
||||
[(set RegF32:$d, (PTXloadparam timm:$a))]>;
|
||||
def LDLOCALpiF64 : InstPTX<(outs RegF64:$d), (ins MEMpi:$a),
|
||||
"ld.param.f64\t$d, [$a]",
|
||||
[(set RegF64:$d, (PTXloadparam timm:$a))]>;
|
||||
|
||||
def STLOCALpiPred : InstPTX<(outs), (ins MEMpi:$d, RegPred:$a),
|
||||
"st.param.pred\t[$d], $a",
|
||||
[(PTXstoreparam timm:$d, RegPred:$a)]>;
|
||||
def STLOCALpiU16 : InstPTX<(outs), (ins MEMpi:$d, RegI16:$a),
|
||||
"st.param.u16\t[$d], $a",
|
||||
[(PTXstoreparam timm:$d, RegI16:$a)]>;
|
||||
def STLOCALpiU32 : InstPTX<(outs), (ins MEMpi:$d, RegI32:$a),
|
||||
"st.param.u32\t[$d], $a",
|
||||
[(PTXstoreparam timm:$d, RegI32:$a)]>;
|
||||
def STLOCALpiU64 : InstPTX<(outs), (ins MEMpi:$d, RegI64:$a),
|
||||
"st.param.u64\t[$d], $a",
|
||||
[(PTXstoreparam timm:$d, RegI64:$a)]>;
|
||||
def STLOCALpiF32 : InstPTX<(outs), (ins MEMpi:$d, RegF32:$a),
|
||||
"st.param.f32\t[$d], $a",
|
||||
[(PTXstoreparam timm:$d, RegF32:$a)]>;
|
||||
def STLOCALpiF64 : InstPTX<(outs), (ins MEMpi:$d, RegF64:$a),
|
||||
"st.param.f64\t[$d], $a",
|
||||
[(PTXstoreparam timm:$d, RegF64:$a)]>;*/
|
||||
}
|
||||
|
||||
// Stores
|
||||
defm STg : PTX_ST_ALL<"st.global", store_global>;
|
||||
//defm STl : PTX_ST_ALL<"st.local", store_local>;
|
||||
defm STs : PTX_ST_ALL<"st.shared", store_shared>;
|
||||
|
||||
defm LOCALPRED : PTX_LOCAL_LD_ST<".pred", RegPred>;
|
||||
defm LOCALU16 : PTX_LOCAL_LD_ST<".u16", RegI16>;
|
||||
defm LOCALU32 : PTX_LOCAL_LD_ST<".u32", RegI32>;
|
||||
defm LOCALU64 : PTX_LOCAL_LD_ST<".u64", RegI64>;
|
||||
defm LOCALF32 : PTX_LOCAL_LD_ST<".f32", RegF32>;
|
||||
defm LOCALF64 : PTX_LOCAL_LD_ST<".f64", RegF64>;
|
||||
|
||||
|
||||
// defm STp : PTX_ST_ALL<"st.param", store_parameter>;
|
||||
|
|
|
@ -65,5 +65,5 @@ void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
|||
|
||||
// This frame index is post stack slot re-use assignments
|
||||
//MI.getOperand(Index).ChangeToRegister(Reg, false);
|
||||
MI.getOperand(Index).ChangeToImmediate(0);
|
||||
MI.getOperand(Index).ChangeToImmediate(FrameIndex);
|
||||
}
|
||||
|
|
|
@ -6,9 +6,6 @@
|
|||
;CHECK: .extern .const .b8 array_constant_i16[20];
|
||||
@array_constant_i16 = external addrspace(1) constant [10 x i16]
|
||||
|
||||
;CHECK: .extern .local .b8 array_local_i16[20];
|
||||
@array_local_i16 = external addrspace(2) global [10 x i16]
|
||||
|
||||
;CHECK: .extern .shared .b8 array_shared_i16[20];
|
||||
@array_shared_i16 = external addrspace(4) global [10 x i16]
|
||||
|
||||
|
@ -18,9 +15,6 @@
|
|||
;CHECK: .extern .const .b8 array_constant_i32[40];
|
||||
@array_constant_i32 = external addrspace(1) constant [10 x i32]
|
||||
|
||||
;CHECK: .extern .local .b8 array_local_i32[40];
|
||||
@array_local_i32 = external addrspace(2) global [10 x i32]
|
||||
|
||||
;CHECK: .extern .shared .b8 array_shared_i32[40];
|
||||
@array_shared_i32 = external addrspace(4) global [10 x i32]
|
||||
|
||||
|
@ -30,9 +24,6 @@
|
|||
;CHECK: .extern .const .b8 array_constant_i64[80];
|
||||
@array_constant_i64 = external addrspace(1) constant [10 x i64]
|
||||
|
||||
;CHECK: .extern .local .b8 array_local_i64[80];
|
||||
@array_local_i64 = external addrspace(2) global [10 x i64]
|
||||
|
||||
;CHECK: .extern .shared .b8 array_shared_i64[80];
|
||||
@array_shared_i64 = external addrspace(4) global [10 x i64]
|
||||
|
||||
|
@ -42,9 +33,6 @@
|
|||
;CHECK: .extern .const .b8 array_constant_float[40];
|
||||
@array_constant_float = external addrspace(1) constant [10 x float]
|
||||
|
||||
;CHECK: .extern .local .b8 array_local_float[40];
|
||||
@array_local_float = external addrspace(2) global [10 x float]
|
||||
|
||||
;CHECK: .extern .shared .b8 array_shared_float[40];
|
||||
@array_shared_float = external addrspace(4) global [10 x float]
|
||||
|
||||
|
@ -54,9 +42,6 @@
|
|||
;CHECK: .extern .const .b8 array_constant_double[80];
|
||||
@array_constant_double = external addrspace(1) constant [10 x double]
|
||||
|
||||
;CHECK: .extern .local .b8 array_local_double[80];
|
||||
@array_local_double = external addrspace(2) global [10 x double]
|
||||
|
||||
;CHECK: .extern .shared .b8 array_shared_double[80];
|
||||
@array_shared_double = external addrspace(4) global [10 x double]
|
||||
|
||||
|
@ -296,56 +281,6 @@ entry:
|
|||
ret double %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t4_local_u16() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_local_i16;
|
||||
;CHECK: ld.local.u16 %ret{{[0-9]+}}, [%r[[R0]]];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0
|
||||
%x = load i16 addrspace(2)* %i
|
||||
ret i16 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t4_local_u32() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_local_i32;
|
||||
;CHECK: ld.local.u32 %ret{{[0-9]+}}, [%r[[R0]]];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0
|
||||
%x = load i32 addrspace(2)* %i
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i64 @t4_local_u64() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_local_i64;
|
||||
;CHECK: ld.local.u64 %ret{{[0-9]+}}, [%r[[R0]]];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0
|
||||
%x = load i64 addrspace(2)* %i
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device float @t4_local_f32() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_local_float;
|
||||
;CHECK: ld.local.f32 %ret{{[0-9]+}}, [%r[[R0]]];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0
|
||||
%x = load float addrspace(2)* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device double @t4_local_f64() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_local_double;
|
||||
;CHECK: ld.local.f64 %ret{{[0-9]+}}, [%r[[R0]]];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0
|
||||
%x = load double addrspace(2)* %i
|
||||
ret double %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t4_shared_u16() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i16;
|
||||
|
|
|
@ -6,9 +6,6 @@
|
|||
;CHECK: .extern .const .b8 array_constant_i16[20];
|
||||
@array_constant_i16 = external addrspace(1) constant [10 x i16]
|
||||
|
||||
;CHECK: .extern .local .b8 array_local_i16[20];
|
||||
@array_local_i16 = external addrspace(2) global [10 x i16]
|
||||
|
||||
;CHECK: .extern .shared .b8 array_shared_i16[20];
|
||||
@array_shared_i16 = external addrspace(4) global [10 x i16]
|
||||
|
||||
|
@ -18,9 +15,6 @@
|
|||
;CHECK: .extern .const .b8 array_constant_i32[40];
|
||||
@array_constant_i32 = external addrspace(1) constant [10 x i32]
|
||||
|
||||
;CHECK: .extern .local .b8 array_local_i32[40];
|
||||
@array_local_i32 = external addrspace(2) global [10 x i32]
|
||||
|
||||
;CHECK: .extern .shared .b8 array_shared_i32[40];
|
||||
@array_shared_i32 = external addrspace(4) global [10 x i32]
|
||||
|
||||
|
@ -30,9 +24,6 @@
|
|||
;CHECK: .extern .const .b8 array_constant_i64[80];
|
||||
@array_constant_i64 = external addrspace(1) constant [10 x i64]
|
||||
|
||||
;CHECK: .extern .local .b8 array_local_i64[80];
|
||||
@array_local_i64 = external addrspace(2) global [10 x i64]
|
||||
|
||||
;CHECK: .extern .shared .b8 array_shared_i64[80];
|
||||
@array_shared_i64 = external addrspace(4) global [10 x i64]
|
||||
|
||||
|
@ -42,9 +33,6 @@
|
|||
;CHECK: .extern .const .b8 array_constant_float[40];
|
||||
@array_constant_float = external addrspace(1) constant [10 x float]
|
||||
|
||||
;CHECK: .extern .local .b8 array_local_float[40];
|
||||
@array_local_float = external addrspace(2) global [10 x float]
|
||||
|
||||
;CHECK: .extern .shared .b8 array_shared_float[40];
|
||||
@array_shared_float = external addrspace(4) global [10 x float]
|
||||
|
||||
|
@ -54,9 +42,6 @@
|
|||
;CHECK: .extern .const .b8 array_constant_double[80];
|
||||
@array_constant_double = external addrspace(1) constant [10 x double]
|
||||
|
||||
;CHECK: .extern .local .b8 array_local_double[80];
|
||||
@array_local_double = external addrspace(2) global [10 x double]
|
||||
|
||||
;CHECK: .extern .shared .b8 array_shared_double[80];
|
||||
@array_shared_double = external addrspace(4) global [10 x double]
|
||||
|
||||
|
@ -251,56 +236,6 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_local_u16(i16 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_local_i16;
|
||||
;CHECK: st.local.u16 [%r[[R0]]], %rh{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0
|
||||
store i16 %x, i16 addrspace(2)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_local_u32(i32 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_local_i32;
|
||||
;CHECK: st.local.u32 [%r[[R0]]], %r{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0
|
||||
store i32 %x, i32 addrspace(2)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_local_u64(i64 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_local_i64;
|
||||
;CHECK: st.local.u64 [%r[[R0]]], %rd{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0
|
||||
store i64 %x, i64 addrspace(2)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_local_f32(float %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_local_float;
|
||||
;CHECK: st.local.f32 [%r[[R0]]], %f{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0
|
||||
store float %x, float addrspace(2)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_local_f64(double %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_local_double;
|
||||
;CHECK: st.local.f64 [%r[[R0]]], %fd{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0
|
||||
store double %x, double addrspace(2)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_shared_u16(i16 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i16;
|
||||
|
|
Loading…
Reference in New Issue