diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h index 328d5dce273e..24039ea10816 100644 --- a/llvm/include/llvm/Target/TargetLowering.h +++ b/llvm/include/llvm/Target/TargetLowering.h @@ -230,6 +230,12 @@ public: return MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); } + /// Return the type for frame index, which is determined by + /// the alloca address space specified through the data layout. + MVT getFrameIndexTy(const DataLayout &DL) const { + return getPointerTy(DL, DL.getAllocaAddrSpace()); + } + /// EVT is not used in-tree, but is used by out-of-tree target. /// A documentation for this function would be nice... virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 2627264d0784..ad169d33fd13 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1826,7 +1826,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign); int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false); - return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout())); + return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); } SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { @@ -1839,7 +1839,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); int FrameIdx = MFI.CreateStackObject(Bytes, Align, false); - return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout())); + return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); } SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 5d1992068dd5..2c58953ee908 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1151,7 +1151,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) return DAG.getFrameIndex(SI->second, - TLI.getPointerTy(DAG.getDataLayout())); + TLI.getFrameIndexTy(DAG.getDataLayout())); } // If this is an instruction which fast-isel has deferred, select it now. @@ -5617,7 +5617,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Ops[2]; Ops[0] = getRoot(); Ops[1] = - DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()), true); + DAG.getFrameIndex(FI, TLI.getFrameIndexTy(DAG.getDataLayout()), true); unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); @@ -6630,7 +6630,7 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, unsigned Align = DL.getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false); - SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy(DL)); + SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL)); Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot, MachinePointerInfo::getFixedStack(MF, SSFI)); OpInfo.CallOperand = StackSlot; @@ -7393,7 +7393,7 @@ static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, } else if (FrameIndexSDNode *FI = dyn_cast(OpVal)) { const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); Ops.push_back(Builder.DAG.getTargetFrameIndex( - FI->getIndex(), TLI.getPointerTy(Builder.DAG.getDataLayout()))); + FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout()))); } else Ops.push_back(OpVal); } @@ -7661,7 +7661,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false); Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); - DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL)); + DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL)); ArgListEntry Entry; Entry.Node = DemoteStackSlot; Entry.Ty = StackSlotPtrType; diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-amdgiz.ll b/llvm/test/CodeGen/AMDGPU/frame-index-amdgiz.ll new file mode 100644 index 000000000000..dd46403b68af --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/frame-index-amdgiz.ll @@ -0,0 +1,55 @@ +; RUN: llc -verify-machineinstrs < %s | FileCheck %s +; +; The original OpenCL kernel: +; kernel void f(global int *a, int i, int j) { +; int x[100]; +; x[i] = 7; +; a[0] = x[j]; +; } +; clang -cc1 -triple amdgcn---amdgizcl -emit-llvm -o - + +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" +target triple = "amdgcn---amdgiz" + +define amdgpu_kernel void @f(i32 addrspace(1)* nocapture %a, i32 %i, i32 %j) local_unnamed_addr #0 { +entry: +; CHECK: s_load_dword s2, s[0:1], 0xb +; CHECK: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; CHECK: s_load_dword s0, s[0:1], 0xc +; CHECK: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 +; CHECK: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 +; CHECK: s_mov_b32 s10, -1 +; CHECK: s_waitcnt lgkmcnt(0) +; CHECK: s_lshl_b32 s1, s2, 2 +; CHECK: v_mov_b32_e32 v0, 4 +; CHECK: s_mov_b32 s11, 0xe8f000 +; CHECK: v_add_i32_e32 v1, vcc, s1, v0 +; CHECK: v_mov_b32_e32 v2, 7 +; CHECK: s_lshl_b32 s0, s0, 2 +; CHECK: buffer_store_dword v2, v1, s[8:11], s3 offen +; CHECK: v_add_i32_e32 v0, vcc, s0, v0 +; CHECK: buffer_load_dword v0, v0, s[8:11], s3 offen +; CHECK: s_mov_b32 s7, 0xf000 +; CHECK: s_mov_b32 s6, -1 +; CHECK: s_waitcnt vmcnt(0) +; CHECK: buffer_store_dword v0, off, s[4:7], 0 +; CHECK: s_endpgm + + %x = alloca [100 x i32], align 4, addrspace(5) + %0 = bitcast [100 x i32] addrspace(5)* %x to i8 addrspace(5)* + call void @llvm.lifetime.start.p5i8(i64 400, i8 addrspace(5)* nonnull %0) #0 + %arrayidx = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %x, i32 0, i32 %i + store i32 7, i32 addrspace(5)* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %x, i32 0, i32 %j + %1 = load i32, i32 addrspace(5)* %arrayidx2, align 4 + store i32 %1, i32 addrspace(1)* %a, align 4 + call void @llvm.lifetime.end.p5i8(i64 400, i8 addrspace(5)* nonnull %0) #0 + ret void +} + +declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #1 + +declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #1 + +attributes #0 = { nounwind } +attributes #1 = { argmemonly nounwind }