CodeGen: Let frame index value type match alloca addr space

Recently alloca address space has been added to data layout. Due to this
change, pointer returned by alloca may have different size as pointer in
address space 0.

However, currently the value type of frame index is assumed to be of the
same size as pointer in address space 0.

This patch fixes that.

Most targets assume alloca returning pointer in address space 0, which
is the default alloca address space. Therefore it is NFC for them.

AMDGCN target with amdgiz environment requires this change since it
assumes alloca returning pointer to addr space 5 and its size is 32,
which is different from the size of pointer in addr space 0 which is 64.

Differential Revision: https://reviews.llvm.org/D32021

llvm-svn: 300864
This commit is contained in:
Yaxun Liu 2017-04-20 18:15:34 +00:00
parent 62731e1c89
commit 5d977f8ed4
4 changed files with 68 additions and 7 deletions

View File

@ -230,6 +230,12 @@ public:
return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
}
/// Return the type for frame index, which is determined by
/// the alloca address space specified through the data layout.
MVT getFrameIndexTy(const DataLayout &DL) const {
return getPointerTy(DL, DL.getAllocaAddrSpace());
}
/// EVT is not used in-tree, but is used by out-of-tree target.
/// A documentation for this function would be nice...
virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const;

View File

@ -1826,7 +1826,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign);
int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
}
SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
@ -1839,7 +1839,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
int FrameIdx = MFI.CreateStackObject(Bytes, Align, false);
return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
}
SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,

View File

@ -1151,7 +1151,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end())
return DAG.getFrameIndex(SI->second,
TLI.getPointerTy(DAG.getDataLayout()));
TLI.getFrameIndexTy(DAG.getDataLayout()));
}
// If this is an instruction which fast-isel has deferred, select it now.
@ -5617,7 +5617,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Ops[2];
Ops[0] = getRoot();
Ops[1] =
DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()), true);
DAG.getFrameIndex(FI, TLI.getFrameIndexTy(DAG.getDataLayout()), true);
unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops);
@ -6630,7 +6630,7 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
unsigned Align = DL.getPrefTypeAlignment(Ty);
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy(DL));
SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL));
Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot,
MachinePointerInfo::getFixedStack(MF, SSFI));
OpInfo.CallOperand = StackSlot;
@ -7393,7 +7393,7 @@ static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx,
} else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
Ops.push_back(Builder.DAG.getTargetFrameIndex(
FI->getIndex(), TLI.getPointerTy(Builder.DAG.getDataLayout())));
FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout())));
} else
Ops.push_back(OpVal);
}
@ -7661,7 +7661,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy);
DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL));
DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL));
ArgListEntry Entry;
Entry.Node = DemoteStackSlot;
Entry.Ty = StackSlotPtrType;

View File

@ -0,0 +1,55 @@
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
;
; The original OpenCL kernel:
; kernel void f(global int *a, int i, int j) {
; int x[100];
; x[i] = 7;
; a[0] = x[j];
; }
; clang -cc1 -triple amdgcn---amdgizcl -emit-llvm -o -
target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
target triple = "amdgcn---amdgiz"
define amdgpu_kernel void @f(i32 addrspace(1)* nocapture %a, i32 %i, i32 %j) local_unnamed_addr #0 {
entry:
; CHECK: s_load_dword s2, s[0:1], 0xb
; CHECK: s_load_dwordx2 s[4:5], s[0:1], 0x9
; CHECK: s_load_dword s0, s[0:1], 0xc
; CHECK: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
; CHECK: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
; CHECK: s_mov_b32 s10, -1
; CHECK: s_waitcnt lgkmcnt(0)
; CHECK: s_lshl_b32 s1, s2, 2
; CHECK: v_mov_b32_e32 v0, 4
; CHECK: s_mov_b32 s11, 0xe8f000
; CHECK: v_add_i32_e32 v1, vcc, s1, v0
; CHECK: v_mov_b32_e32 v2, 7
; CHECK: s_lshl_b32 s0, s0, 2
; CHECK: buffer_store_dword v2, v1, s[8:11], s3 offen
; CHECK: v_add_i32_e32 v0, vcc, s0, v0
; CHECK: buffer_load_dword v0, v0, s[8:11], s3 offen
; CHECK: s_mov_b32 s7, 0xf000
; CHECK: s_mov_b32 s6, -1
; CHECK: s_waitcnt vmcnt(0)
; CHECK: buffer_store_dword v0, off, s[4:7], 0
; CHECK: s_endpgm
%x = alloca [100 x i32], align 4, addrspace(5)
%0 = bitcast [100 x i32] addrspace(5)* %x to i8 addrspace(5)*
call void @llvm.lifetime.start.p5i8(i64 400, i8 addrspace(5)* nonnull %0) #0
%arrayidx = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %x, i32 0, i32 %i
store i32 7, i32 addrspace(5)* %arrayidx, align 4
%arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %x, i32 0, i32 %j
%1 = load i32, i32 addrspace(5)* %arrayidx2, align 4
store i32 %1, i32 addrspace(1)* %a, align 4
call void @llvm.lifetime.end.p5i8(i64 400, i8 addrspace(5)* nonnull %0) #0
ret void
}
declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #1
declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #1
attributes #0 = { nounwind }
attributes #1 = { argmemonly nounwind }