forked from OSchip/llvm-project
AMDGPU/GFX9: Fix shared and private aperture queries
Differential Revision: https://reviews.llvm.org/D31786 llvm-svn: 299727
This commit is contained in:
parent
380611addc
commit
4b3847e865
|
@ -248,6 +248,7 @@ enum Id { // HwRegCode, (6) [5:0]
|
|||
ID_LDS_ALLOC = 6,
|
||||
ID_IB_STS = 7,
|
||||
ID_SYMBOLIC_LAST_ = 8,
|
||||
ID_MEM_BASES = 15,
|
||||
ID_SHIFT_ = 0,
|
||||
ID_WIDTH_ = 6,
|
||||
ID_MASK_ = (((1 << ID_WIDTH_) - 1) << ID_SHIFT_)
|
||||
|
@ -257,14 +258,20 @@ enum Offset { // Offset, (5) [10:6]
|
|||
OFFSET_DEFAULT_ = 0,
|
||||
OFFSET_SHIFT_ = 6,
|
||||
OFFSET_WIDTH_ = 5,
|
||||
OFFSET_MASK_ = (((1 << OFFSET_WIDTH_) - 1) << OFFSET_SHIFT_)
|
||||
OFFSET_MASK_ = (((1 << OFFSET_WIDTH_) - 1) << OFFSET_SHIFT_),
|
||||
|
||||
OFFSET_SRC_SHARED_BASE = 16,
|
||||
OFFSET_SRC_PRIVATE_BASE = 0
|
||||
};
|
||||
|
||||
enum WidthMinusOne { // WidthMinusOne, (5) [15:11]
|
||||
WIDTH_M1_DEFAULT_ = 31,
|
||||
WIDTH_M1_SHIFT_ = 11,
|
||||
WIDTH_M1_WIDTH_ = 5,
|
||||
WIDTH_M1_MASK_ = (((1 << WIDTH_M1_WIDTH_) - 1) << WIDTH_M1_SHIFT_)
|
||||
WIDTH_M1_MASK_ = (((1 << WIDTH_M1_WIDTH_) - 1) << WIDTH_M1_SHIFT_),
|
||||
|
||||
WIDTH_M1_SRC_SHARED_BASE = 15,
|
||||
WIDTH_M1_SRC_PRIVATE_BASE = 15
|
||||
};
|
||||
|
||||
} // namespace Hwreg
|
||||
|
|
|
@ -2340,16 +2340,28 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
|
|||
return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);;
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::getSegmentAperture(unsigned AS,
|
||||
SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
|
||||
SelectionDAG &DAG) const {
|
||||
// FIXME: Use inline constants (src_{shared, private}_base) instead.
|
||||
if (Subtarget->hasApertureRegs()) {
|
||||
unsigned Offset = AS == AMDGPUASI.LOCAL_ADDRESS ?
|
||||
AMDGPU::Hwreg::OFFSET_SRC_SHARED_BASE :
|
||||
AMDGPU::Hwreg::OFFSET_SRC_PRIVATE_BASE;
|
||||
unsigned WidthM1 = AS == AMDGPUASI.LOCAL_ADDRESS ?
|
||||
AMDGPU::Hwreg::WIDTH_M1_SRC_SHARED_BASE :
|
||||
AMDGPU::Hwreg::WIDTH_M1_SRC_PRIVATE_BASE;
|
||||
unsigned Encoding =
|
||||
AMDGPU::Hwreg::ID_MEM_BASES << AMDGPU::Hwreg::ID_SHIFT_ |
|
||||
Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ |
|
||||
WidthM1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_;
|
||||
|
||||
if (Subtarget->hasApertureRegs()) { // Read from Aperture Registers directly.
|
||||
unsigned RegNo = (AS == AMDGPUASI.LOCAL_ADDRESS) ? AMDGPU::SRC_SHARED_BASE :
|
||||
AMDGPU::SRC_PRIVATE_BASE;
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, RegNo, MVT::i32);
|
||||
SDValue EncodingImm = DAG.getTargetConstant(Encoding, DL, MVT::i16);
|
||||
SDValue ApertureReg = SDValue(
|
||||
DAG.getMachineNode(AMDGPU::S_GETREG_B32, DL, MVT::i32, EncodingImm), 0);
|
||||
SDValue ShiftAmount = DAG.getTargetConstant(WidthM1 + 1, DL, MVT::i32);
|
||||
return DAG.getNode(ISD::SHL, DL, MVT::i32, ApertureReg, ShiftAmount);
|
||||
}
|
||||
|
||||
SDLoc SL;
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
|
||||
unsigned UserSGPR = Info->getQueuePtrUserSGPR();
|
||||
|
@ -2362,8 +2374,8 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS,
|
|||
// private_segment_aperture_base_hi.
|
||||
uint32_t StructOffset = (AS == AMDGPUASI.LOCAL_ADDRESS) ? 0x40 : 0x44;
|
||||
|
||||
SDValue Ptr = DAG.getNode(ISD::ADD, SL, MVT::i64, QueuePtr,
|
||||
DAG.getConstant(StructOffset, SL, MVT::i64));
|
||||
SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, QueuePtr,
|
||||
DAG.getConstant(StructOffset, DL, MVT::i64));
|
||||
|
||||
// TODO: Use custom target PseudoSourceValue.
|
||||
// TODO: We should use the value from the IR intrinsic call, but it might not
|
||||
|
@ -2372,7 +2384,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS,
|
|||
AMDGPUASI.CONSTANT_ADDRESS));
|
||||
|
||||
MachinePointerInfo PtrInfo(V, StructOffset);
|
||||
return DAG.getLoad(MVT::i32, SL, QueuePtr.getValue(1), Ptr, PtrInfo,
|
||||
return DAG.getLoad(MVT::i32, DL, QueuePtr.getValue(1), Ptr, PtrInfo,
|
||||
MinAlign(64, StructOffset),
|
||||
MachineMemOperand::MODereferenceable |
|
||||
MachineMemOperand::MOInvariant);
|
||||
|
@ -2417,7 +2429,7 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
|
|||
SDValue NonNull
|
||||
= DAG.getSetCC(SL, MVT::i1, Src, SegmentNullPtr, ISD::SETNE);
|
||||
|
||||
SDValue Aperture = getSegmentAperture(ASC->getSrcAddressSpace(), DAG);
|
||||
SDValue Aperture = getSegmentAperture(ASC->getSrcAddressSpace(), SL, DAG);
|
||||
SDValue CvtPtr
|
||||
= DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, Aperture);
|
||||
|
||||
|
|
|
@ -58,7 +58,9 @@ class SITargetLowering final : public AMDGPUTargetLowering {
|
|||
/// \brief Custom lowering for ISD::FP_ROUND for MVT::f16.
|
||||
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue getSegmentAperture(unsigned AS, SelectionDAG &DAG) const;
|
||||
SDValue getSegmentAperture(unsigned AS, const SDLoc &DL,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
SDValue lowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
|
|
@ -12,7 +12,11 @@
|
|||
; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
|
||||
|
||||
; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
|
||||
; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_shared_base
|
||||
; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(15, 16, 16)
|
||||
; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16
|
||||
; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[SSRC_SHARED_BASE]]
|
||||
|
||||
; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_shared_base
|
||||
|
||||
; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
|
||||
|
@ -45,7 +49,11 @@ define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %pt
|
|||
; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
|
||||
|
||||
; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
|
||||
; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_private_base
|
||||
; GFX9-DAG: s_getreg_b32 [[SSRC_PRIVATE:s[0-9]+]], hwreg(15, 0, 16)
|
||||
; GFX9-DAG: s_lshl_b32 [[SSRC_PRIVATE_BASE:s[0-9]+]], [[SSRC_PRIVATE]], 16
|
||||
; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[SSRC_PRIVATE_BASE]]
|
||||
|
||||
; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_private_base
|
||||
|
||||
; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
|
||||
|
@ -153,7 +161,11 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)*
|
|||
; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
|
||||
; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
|
||||
; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
|
||||
; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], src_shared_base
|
||||
; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(15, 16, 16)
|
||||
; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16
|
||||
; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SSRC_SHARED_BASE]]
|
||||
|
||||
; GFX9-XXX: v_mov_b32_e32 v[[HI:[0-9]+]], src_shared_base
|
||||
|
||||
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
|
||||
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
|
||||
|
|
Loading…
Reference in New Issue