forked from OSchip/llvm-project
AMDGPU: Add implicitarg.ptr intrinsic.
Points to the start of implicit arguments (appended after explicit arguments) Differential Revision: http://reviews.llvm.org/D20297 llvm-svn: 273317
This commit is contained in:
parent
78028b84d2
commit
fea814d531
|
@ -334,6 +334,10 @@ def int_amdgcn_kernarg_segment_ptr :
|
||||||
GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">,
|
GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">,
|
||||||
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
|
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
|
||||||
|
|
||||||
|
def int_amdgcn_implicitarg_ptr :
|
||||||
|
GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">,
|
||||||
|
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
|
||||||
|
|
||||||
// __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0>
|
// __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0>
|
||||||
def int_amdgcn_interp_p1 :
|
def int_amdgcn_interp_p1 :
|
||||||
GCCBuiltin<"__builtin_amdgcn_interp_p1">,
|
GCCBuiltin<"__builtin_amdgcn_interp_p1">,
|
||||||
|
|
|
@ -201,8 +201,9 @@ public:
|
||||||
unsigned Reg, EVT VT) const;
|
unsigned Reg, EVT VT) const;
|
||||||
|
|
||||||
enum ImplicitParameter {
|
enum ImplicitParameter {
|
||||||
GRID_DIM,
|
FIRST_IMPLICIT,
|
||||||
GRID_OFFSET
|
GRID_DIM = FIRST_IMPLICIT,
|
||||||
|
GRID_OFFSET,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// \brief Helper function that returns the byte offset of the given
|
/// \brief Helper function that returns the byte offset of the given
|
||||||
|
|
|
@ -534,24 +534,29 @@ bool SITargetLowering::isTypeDesirableForOp(unsigned Op, EVT VT) const {
|
||||||
return TargetLowering::isTypeDesirableForOp(Op, VT);
|
return TargetLowering::isTypeDesirableForOp(Op, VT);
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
|
SDValue SITargetLowering::LowerParameterPtr(SelectionDAG &DAG,
|
||||||
const SDLoc &SL, SDValue Chain,
|
const SDLoc &SL, SDValue Chain,
|
||||||
unsigned Offset, bool Signed) const {
|
unsigned Offset) const {
|
||||||
const DataLayout &DL = DAG.getDataLayout();
|
const DataLayout &DL = DAG.getDataLayout();
|
||||||
MachineFunction &MF = DAG.getMachineFunction();
|
MachineFunction &MF = DAG.getMachineFunction();
|
||||||
const SIRegisterInfo *TRI =
|
const SIRegisterInfo *TRI =
|
||||||
static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
|
static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
|
||||||
unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
|
unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
|
||||||
|
|
||||||
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
|
|
||||||
|
|
||||||
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
|
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
|
||||||
MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
|
MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
|
||||||
PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
|
|
||||||
SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
|
SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
|
||||||
MRI.getLiveInVirtReg(InputPtrReg), PtrVT);
|
MRI.getLiveInVirtReg(InputPtrReg), PtrVT);
|
||||||
SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
|
return DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
|
||||||
DAG.getConstant(Offset, SL, PtrVT));
|
DAG.getConstant(Offset, SL, PtrVT));
|
||||||
|
}
|
||||||
|
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
|
||||||
|
const SDLoc &SL, SDValue Chain,
|
||||||
|
unsigned Offset, bool Signed) const {
|
||||||
|
const DataLayout &DL = DAG.getDataLayout();
|
||||||
|
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
|
||||||
|
MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
|
||||||
|
PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
|
||||||
SDValue PtrOffset = DAG.getUNDEF(PtrVT);
|
SDValue PtrOffset = DAG.getUNDEF(PtrVT);
|
||||||
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
|
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
|
||||||
|
|
||||||
|
@ -561,6 +566,7 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
|
||||||
if (MemVT.isFloatingPoint())
|
if (MemVT.isFloatingPoint())
|
||||||
ExtTy = ISD::EXTLOAD;
|
ExtTy = ISD::EXTLOAD;
|
||||||
|
|
||||||
|
SDValue Ptr = LowerParameterPtr(DAG, SL, Chain, Offset);
|
||||||
return DAG.getLoad(ISD::UNINDEXED, ExtTy,
|
return DAG.getLoad(ISD::UNINDEXED, ExtTy,
|
||||||
VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT,
|
VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT,
|
||||||
false, // isVolatile
|
false, // isVolatile
|
||||||
|
@ -1540,6 +1546,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||||
return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass,
|
return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass,
|
||||||
TRI->getPreloadedValue(MF, Reg), VT);
|
TRI->getPreloadedValue(MF, Reg), VT);
|
||||||
}
|
}
|
||||||
|
case Intrinsic::amdgcn_implicitarg_ptr: {
|
||||||
|
unsigned offset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
|
||||||
|
return LowerParameterPtr(DAG, DL, DAG.getEntryNode(), offset);
|
||||||
|
}
|
||||||
case Intrinsic::amdgcn_kernarg_segment_ptr: {
|
case Intrinsic::amdgcn_kernarg_segment_ptr: {
|
||||||
unsigned Reg
|
unsigned Reg
|
||||||
= TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
|
= TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
|
||||||
|
|
|
@ -21,7 +21,9 @@
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
|
|
||||||
class SITargetLowering final : public AMDGPUTargetLowering {
|
class SITargetLowering final : public AMDGPUTargetLowering {
|
||||||
SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &DL,
|
SDValue LowerParameterPtr(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain,
|
||||||
|
unsigned Offset) const;
|
||||||
|
SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL,
|
||||||
SDValue Chain, unsigned Offset, bool Signed) const;
|
SDValue Chain, unsigned Offset, bool Signed) const;
|
||||||
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
|
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
|
||||||
SelectionDAG &DAG) const override;
|
SelectionDAG &DAG) const override;
|
||||||
|
|
|
@ -15,7 +15,20 @@ define void @test(i32 addrspace(1)* %out) #1 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; ALL-LABEL: {{^}}test_implicit:
|
||||||
|
; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15
|
||||||
|
; MESA: s_load_dword s{{[0-9]+}}, s[0:1], 0x15
|
||||||
|
define void @test_implicit(i32 addrspace(1)* %out) #1 {
|
||||||
|
%implicitarg.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
|
||||||
|
%header.ptr = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
|
||||||
|
%gep = getelementptr i32, i32 addrspace(2)* %header.ptr, i64 10
|
||||||
|
%value = load i32, i32 addrspace(2)* %gep
|
||||||
|
store i32 %value, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
|
declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
|
||||||
|
declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #0
|
||||||
|
|
||||||
attributes #0 = { nounwind readnone }
|
attributes #0 = { nounwind readnone }
|
||||||
attributes #1 = { nounwind }
|
attributes #1 = { nounwind }
|
||||||
|
|
Loading…
Reference in New Issue