AMDGPU/GFX9: Fix shared and private aperture queries

Differential Revision: https://reviews.llvm.org/D31786

llvm-svn: 299727
This commit is contained in:
Konstantin Zhuravlyov 2017-04-06 23:02:33 +00:00
parent 380611addc
commit 4b3847e865
4 changed files with 49 additions and 16 deletions

View File

@ -248,6 +248,7 @@ enum Id { // HwRegCode, (6) [5:0]
ID_LDS_ALLOC = 6,
ID_IB_STS = 7,
ID_SYMBOLIC_LAST_ = 8,
ID_MEM_BASES = 15,
ID_SHIFT_ = 0,
ID_WIDTH_ = 6,
ID_MASK_ = (((1 << ID_WIDTH_) - 1) << ID_SHIFT_)
@ -257,14 +258,20 @@ enum Offset { // Offset, (5) [10:6]
OFFSET_DEFAULT_ = 0,
OFFSET_SHIFT_ = 6,
OFFSET_WIDTH_ = 5,
OFFSET_MASK_ = (((1 << OFFSET_WIDTH_) - 1) << OFFSET_SHIFT_)
OFFSET_MASK_ = (((1 << OFFSET_WIDTH_) - 1) << OFFSET_SHIFT_),
OFFSET_SRC_SHARED_BASE = 16,
OFFSET_SRC_PRIVATE_BASE = 0
};
enum WidthMinusOne { // WidthMinusOne, (5) [15:11]
WIDTH_M1_DEFAULT_ = 31,
WIDTH_M1_SHIFT_ = 11,
WIDTH_M1_WIDTH_ = 5,
WIDTH_M1_MASK_ = (((1 << WIDTH_M1_WIDTH_) - 1) << WIDTH_M1_SHIFT_)
WIDTH_M1_MASK_ = (((1 << WIDTH_M1_WIDTH_) - 1) << WIDTH_M1_SHIFT_),
WIDTH_M1_SRC_SHARED_BASE = 15,
WIDTH_M1_SRC_PRIVATE_BASE = 15
};
} // namespace Hwreg

View File

@ -2340,16 +2340,28 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);;
}
SDValue SITargetLowering::getSegmentAperture(unsigned AS,
SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
SelectionDAG &DAG) const {
// FIXME: Use inline constants (src_{shared, private}_base) instead.
if (Subtarget->hasApertureRegs()) {
unsigned Offset = AS == AMDGPUASI.LOCAL_ADDRESS ?
AMDGPU::Hwreg::OFFSET_SRC_SHARED_BASE :
AMDGPU::Hwreg::OFFSET_SRC_PRIVATE_BASE;
unsigned WidthM1 = AS == AMDGPUASI.LOCAL_ADDRESS ?
AMDGPU::Hwreg::WIDTH_M1_SRC_SHARED_BASE :
AMDGPU::Hwreg::WIDTH_M1_SRC_PRIVATE_BASE;
unsigned Encoding =
AMDGPU::Hwreg::ID_MEM_BASES << AMDGPU::Hwreg::ID_SHIFT_ |
Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ |
WidthM1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_;
if (Subtarget->hasApertureRegs()) { // Read from Aperture Registers directly.
unsigned RegNo = (AS == AMDGPUASI.LOCAL_ADDRESS) ? AMDGPU::SRC_SHARED_BASE :
AMDGPU::SRC_PRIVATE_BASE;
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, RegNo, MVT::i32);
SDValue EncodingImm = DAG.getTargetConstant(Encoding, DL, MVT::i16);
SDValue ApertureReg = SDValue(
DAG.getMachineNode(AMDGPU::S_GETREG_B32, DL, MVT::i32, EncodingImm), 0);
SDValue ShiftAmount = DAG.getTargetConstant(WidthM1 + 1, DL, MVT::i32);
return DAG.getNode(ISD::SHL, DL, MVT::i32, ApertureReg, ShiftAmount);
}
SDLoc SL;
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
unsigned UserSGPR = Info->getQueuePtrUserSGPR();
@ -2362,8 +2374,8 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS,
// private_segment_aperture_base_hi.
uint32_t StructOffset = (AS == AMDGPUASI.LOCAL_ADDRESS) ? 0x40 : 0x44;
SDValue Ptr = DAG.getNode(ISD::ADD, SL, MVT::i64, QueuePtr,
DAG.getConstant(StructOffset, SL, MVT::i64));
SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, QueuePtr,
DAG.getConstant(StructOffset, DL, MVT::i64));
// TODO: Use custom target PseudoSourceValue.
// TODO: We should use the value from the IR intrinsic call, but it might not
@ -2372,7 +2384,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS,
AMDGPUASI.CONSTANT_ADDRESS));
MachinePointerInfo PtrInfo(V, StructOffset);
return DAG.getLoad(MVT::i32, SL, QueuePtr.getValue(1), Ptr, PtrInfo,
return DAG.getLoad(MVT::i32, DL, QueuePtr.getValue(1), Ptr, PtrInfo,
MinAlign(64, StructOffset),
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
@ -2417,7 +2429,7 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
SDValue NonNull
= DAG.getSetCC(SL, MVT::i1, Src, SegmentNullPtr, ISD::SETNE);
SDValue Aperture = getSegmentAperture(ASC->getSrcAddressSpace(), DAG);
SDValue Aperture = getSegmentAperture(ASC->getSrcAddressSpace(), SL, DAG);
SDValue CvtPtr
= DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, Aperture);

View File

@ -58,7 +58,9 @@ class SITargetLowering final : public AMDGPUTargetLowering {
/// \brief Custom lowering for ISD::FP_ROUND for MVT::f16.
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue getSegmentAperture(unsigned AS, SelectionDAG &DAG) const;
SDValue getSegmentAperture(unsigned AS, const SDLoc &DL,
SelectionDAG &DAG) const;
SDValue lowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;

View File

@ -12,7 +12,11 @@
; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_shared_base
; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(15, 16, 16)
; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16
; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[SSRC_SHARED_BASE]]
; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_shared_base
; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
@ -45,7 +49,11 @@ define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %pt
; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_private_base
; GFX9-DAG: s_getreg_b32 [[SSRC_PRIVATE:s[0-9]+]], hwreg(15, 0, 16)
; GFX9-DAG: s_lshl_b32 [[SSRC_PRIVATE_BASE:s[0-9]+]], [[SSRC_PRIVATE]], 16
; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[SSRC_PRIVATE_BASE]]
; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_private_base
; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
@ -153,7 +161,11 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)*
; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], src_shared_base
; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(15, 16, 16)
; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16
; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SSRC_SHARED_BASE]]
; GFX9-XXX: v_mov_b32_e32 v[[HI:[0-9]+]], src_shared_base
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}