forked from OSchip/llvm-project
R600: Add support for i8 and i16 local memory stores
llvm-svn: 189223
This commit is contained in:
parent
2ffc330673
commit
f3d166aa1e
|
@ -495,9 +495,9 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
|
|||
Store->getBasePtr(),
|
||||
DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8),
|
||||
PtrVT));
|
||||
Chains.push_back(DAG.getStore(Store->getChain(), SL, Val, Ptr,
|
||||
Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
|
||||
MachinePointerInfo(Store->getMemOperand()->getValue()),
|
||||
Store->isVolatile(), Store->isNonTemporal(),
|
||||
MemEltVT, Store->isVolatile(), Store->isNonTemporal(),
|
||||
Store->getAlignment()));
|
||||
}
|
||||
return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts);
|
||||
|
|
|
@ -73,6 +73,13 @@ def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",
|
|||
SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
|
||||
[SDNPHasChain, SDNPMayStore]>;
|
||||
|
||||
// MSKOR instructions are atomic memory instructions used mainly for storing
|
||||
// 8-bit and 16-bit values. The definition is:
|
||||
//
|
||||
// MSKOR(dst, mask, src) MEM[dst] = ((MEM[dst] & ~mask) | src)
|
||||
//
|
||||
// src0: vec4(src, 0, 0, mask)
|
||||
// src1: dst - rat offset (aka pointer) in dwords
|
||||
def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR",
|
||||
SDTypeProfile<0, 2, []>,
|
||||
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
|
|
|
@ -156,13 +156,23 @@ def truncstorei16_global : PatFrag<(ops node:$val, node:$ptr),
|
|||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
return isLocalLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def local_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(store node:$val, node:$ptr), [{
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def truncstorei8_local : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorei8 node:$val, node:$ptr), [{
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def truncstorei16_local : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorei16 node:$val, node:$ptr), [{
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
return isLocalLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def mskor_global : PatFrag<(ops node:$val, node:$ptr),
|
||||
|
|
|
@ -45,7 +45,8 @@ namespace R600_InstFlag {
|
|||
ALU_INST = (1 << 14),
|
||||
LDS_1A = (1 << 15),
|
||||
LDS_1A1D = (1 << 16),
|
||||
IS_EXPORT = (1 << 17)
|
||||
IS_EXPORT = (1 << 17),
|
||||
LDS_1A2D = (1 << 18)
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
|
|||
bit TEXInst = 0;
|
||||
bit ALUInst = 0;
|
||||
bit IsExport = 0;
|
||||
bit LDS_1A2D = 0;
|
||||
|
||||
let Namespace = "AMDGPU";
|
||||
let OutOperandList = outs;
|
||||
|
@ -55,6 +56,7 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
|
|||
let TSFlags{15} = LDS_1A;
|
||||
let TSFlags{16} = LDS_1A1D;
|
||||
let TSFlags{17} = IsExport;
|
||||
let TSFlags{18} = LDS_1A2D;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -149,7 +149,8 @@ bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
|
|||
unsigned TargetFlags = get(Opcode).TSFlags;
|
||||
|
||||
return ((TargetFlags & R600_InstFlag::LDS_1A) |
|
||||
(TargetFlags & R600_InstFlag::LDS_1A1D));
|
||||
(TargetFlags & R600_InstFlag::LDS_1A1D) |
|
||||
(TargetFlags & R600_InstFlag::LDS_1A2D));
|
||||
}
|
||||
|
||||
bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
|
||||
|
|
|
@ -1657,13 +1657,31 @@ class R600_LDS_1A1D <bits<6> lds_op, string name, list<dag> pattern> :
|
|||
let LDS_1A1D = 1;
|
||||
}
|
||||
|
||||
def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET",
|
||||
[(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))]
|
||||
>;
|
||||
class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> :
|
||||
R600_LDS <
|
||||
lds_op,
|
||||
(outs),
|
||||
(ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel,
|
||||
R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel,
|
||||
R600_Reg32:$src2, REL:$src2_rel, SEL:$src2_sel,
|
||||
LAST:$last, R600_Pred:$pred_sel, BANK_SWIZZLE:$bank_swizzle),
|
||||
" "#name# "$last $src0$src0_rel, $src1$src1_rel, $src2$src2_rel, $pred_sel",
|
||||
pattern> {
|
||||
let LDS_1A2D = 1;
|
||||
}
|
||||
|
||||
def LDS_WRITE : R600_LDS_1A1D <0xD, "LDS_WRITE",
|
||||
[(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)]
|
||||
>;
|
||||
def LDS_BYTE_WRITE : R600_LDS_1A1D<0x12, "LDS_BYTE_WRITE",
|
||||
[(truncstorei8_local i32:$src1, i32:$src0)]
|
||||
>;
|
||||
def LDS_SHORT_WRITE : R600_LDS_1A1D<0x13, "LDS_SHORT_WRITE",
|
||||
[(truncstorei16_local i32:$src1, i32:$src0)]
|
||||
>;
|
||||
def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET",
|
||||
[(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))]
|
||||
>;
|
||||
|
||||
// TRUNC is used for the FLT_TO_INT instructions to work around a
|
||||
// perceived problem where the rounding modes are applied differently
|
||||
|
|
|
@ -392,6 +392,8 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
|
|||
} // End isCompare = 1
|
||||
|
||||
def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
|
||||
def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>;
|
||||
def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>;
|
||||
def DS_READ_B32 : DS_Load_Helper <0x00000036, "DS_READ_B32", VReg_32>;
|
||||
|
||||
//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
|
||||
|
@ -1750,11 +1752,15 @@ def : Pat <
|
|||
(i32 (DS_READ_B32 0, $src0, $src0, $src0, 0, 0))
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(local_store i32:$src1, i32:$src0),
|
||||
(DS_WRITE_B32 0, $src0, $src1, $src1, 0, 0)
|
||||
class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
|
||||
(frag i32:$src1, i32:$src0),
|
||||
(inst 0, $src0, $src1, $src1, 0, 0)
|
||||
>;
|
||||
|
||||
def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
|
||||
def : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
|
||||
def : DSWritePat <DS_WRITE_B32, i32, local_store>;
|
||||
|
||||
/********** ================== **********/
|
||||
/********** SMRD Patterns **********/
|
||||
/********** ================== **********/
|
||||
|
|
|
@ -172,6 +172,24 @@ entry:
|
|||
; Local Address Space
|
||||
;===------------------------------------------------------------------------===;
|
||||
|
||||
; EG-CHECK: @store_local_i8
|
||||
; EG-CHECK: LDS_BYTE_WRITE
|
||||
; SI-CHECK: @store_local_i8
|
||||
; SI-CHECK: DS_WRITE_B8
|
||||
define void @store_local_i8(i8 addrspace(3)* %out, i8 %in) {
|
||||
store i8 %in, i8 addrspace(3)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-CHECK: @store_local_i16
|
||||
; EG-CHECK: LDS_SHORT_WRITE
|
||||
; SI-CHECK: @store_local_i16
|
||||
; SI-CHECK: DS_WRITE_B16
|
||||
define void @store_local_i16(i16 addrspace(3)* %out, i16 %in) {
|
||||
store i16 %in, i16 addrspace(3)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-CHECK: @store_local_v2i16
|
||||
; EG-CHECK: LDS_WRITE
|
||||
; CM-CHECK: @store_local_v2i16
|
||||
|
|
Loading…
Reference in New Issue