forked from OSchip/llvm-project
AMDGPU: Use assert zext for workgroup sizes
llvm-svn: 254328
This commit is contained in:
parent
3a6ac9f9b5
commit
ff6da2fe89
|
@ -1043,6 +1043,18 @@ SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
|
|||
// a glue result.
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::lowerImplicitZextParam(SelectionDAG &DAG,
|
||||
SDValue Op,
|
||||
MVT VT,
|
||||
unsigned Offset) const {
|
||||
SDLoc SL(Op);
|
||||
SDValue Param = LowerParameter(DAG, MVT::i32, MVT::i32, SL,
|
||||
DAG.getEntryNode(), Offset, false);
|
||||
// The local size values will have the hi 16-bits as zero.
|
||||
return DAG.getNode(ISD::AssertZext, SL, MVT::i32, Param,
|
||||
DAG.getValueType(VT));
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
|
@ -1080,19 +1092,18 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
||||
SI::KernelInputOffsets::GLOBAL_SIZE_Z, false);
|
||||
case Intrinsic::r600_read_local_size_x:
|
||||
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
||||
SI::KernelInputOffsets::LOCAL_SIZE_X, false);
|
||||
return lowerImplicitZextParam(DAG, Op, MVT::i16,
|
||||
SI::KernelInputOffsets::LOCAL_SIZE_X);
|
||||
case Intrinsic::r600_read_local_size_y:
|
||||
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
||||
SI::KernelInputOffsets::LOCAL_SIZE_Y, false);
|
||||
return lowerImplicitZextParam(DAG, Op, MVT::i16,
|
||||
SI::KernelInputOffsets::LOCAL_SIZE_Y);
|
||||
case Intrinsic::r600_read_local_size_z:
|
||||
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
||||
SI::KernelInputOffsets::LOCAL_SIZE_Z, false);
|
||||
|
||||
return lowerImplicitZextParam(DAG, Op, MVT::i16,
|
||||
SI::KernelInputOffsets::LOCAL_SIZE_Z);
|
||||
case Intrinsic::AMDGPU_read_workdim:
|
||||
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
||||
getImplicitParameterOffset(MFI, GRID_DIM), false);
|
||||
|
||||
// Really only 2 bits.
|
||||
return lowerImplicitZextParam(DAG, Op, MVT::i8,
|
||||
getImplicitParameterOffset(MFI, GRID_DIM));
|
||||
case Intrinsic::r600_read_tgid_x:
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
|
||||
TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_X), VT);
|
||||
|
|
|
@ -28,6 +28,9 @@ class SITargetLowering : public AMDGPUTargetLowering {
|
|||
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
|
||||
SelectionDAG &DAG) const override;
|
||||
|
||||
SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,
|
||||
MVT VT, unsigned Offset) const;
|
||||
|
||||
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
|
|
@ -213,6 +213,66 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}local_size_x_known_bits:
|
||||
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
|
||||
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
|
||||
; GCN-NOT: 0xffff
|
||||
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; GCN-NEXT: buffer_store_dword [[VVAL]]
|
||||
define void @local_size_x_known_bits(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%size = call i32 @llvm.r600.read.local.size.x() #0
|
||||
%shl = shl i32 %size, 16
|
||||
%shr = lshr i32 %shl, 16
|
||||
store i32 %shr, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}local_size_y_known_bits:
|
||||
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
|
||||
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
|
||||
; GCN-NOT: 0xffff
|
||||
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; GCN-NEXT: buffer_store_dword [[VVAL]]
|
||||
define void @local_size_y_known_bits(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%size = call i32 @llvm.r600.read.local.size.y() #0
|
||||
%shl = shl i32 %size, 16
|
||||
%shr = lshr i32 %shl, 16
|
||||
store i32 %shr, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}local_size_z_known_bits:
|
||||
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
|
||||
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
|
||||
; GCN-NOT: 0xffff
|
||||
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; GCN-NEXT: buffer_store_dword [[VVAL]]
|
||||
define void @local_size_z_known_bits(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%size = call i32 @llvm.r600.read.local.size.z() #0
|
||||
%shl = shl i32 %size, 16
|
||||
%shr = lshr i32 %shl, 16
|
||||
store i32 %shr, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}get_work_dim_known_bits:
|
||||
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
|
||||
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
|
||||
; GCN-NOT: 0xff
|
||||
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; GCN: buffer_store_dword [[VVAL]]
|
||||
define void @get_work_dim_known_bits(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%dim = call i32 @llvm.AMDGPU.read.workdim() #0
|
||||
%shl = shl i32 %dim, 24
|
||||
%shr = lshr i32 %shl, 24
|
||||
store i32 %shr, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.r600.read.ngroups.x() #0
|
||||
declare i32 @llvm.r600.read.ngroups.y() #0
|
||||
declare i32 @llvm.r600.read.ngroups.z() #0
|
||||
|
|
Loading…
Reference in New Issue