forked from OSchip/llvm-project
CodeGen: Let frame index value type match alloca addr space
Recently alloca address space has been added to data layout. Due to this change, pointer returned by alloca may have different size as pointer in address space 0. However, currently the value type of frame index is assumed to be of the same size as pointer in address space 0. This patch fixes that. Most targets assume alloca returning pointer in address space 0, which is the default alloca address space. Therefore it is NFC for them. AMDGCN target with amdgiz environment requires this change since it assumes alloca returning pointer to addr space 5 and its size is 32, which is different from the size of pointer in addr space 0 which is 64. Differential Revision: https://reviews.llvm.org/D32021 llvm-svn: 300864
This commit is contained in:
parent
62731e1c89
commit
5d977f8ed4
|
@ -230,6 +230,12 @@ public:
|
||||||
return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
|
return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the type for frame index, which is determined by
|
||||||
|
/// the alloca address space specified through the data layout.
|
||||||
|
MVT getFrameIndexTy(const DataLayout &DL) const {
|
||||||
|
return getPointerTy(DL, DL.getAllocaAddrSpace());
|
||||||
|
}
|
||||||
|
|
||||||
/// EVT is not used in-tree, but is used by out-of-tree target.
|
/// EVT is not used in-tree, but is used by out-of-tree target.
|
||||||
/// A documentation for this function would be nice...
|
/// A documentation for this function would be nice...
|
||||||
virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const;
|
virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const;
|
||||||
|
|
|
@ -1826,7 +1826,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
|
||||||
std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign);
|
std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign);
|
||||||
|
|
||||||
int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
|
int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
|
||||||
return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
|
return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
|
SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
|
||||||
|
@ -1839,7 +1839,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
|
||||||
|
|
||||||
MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
|
MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
|
||||||
int FrameIdx = MFI.CreateStackObject(Bytes, Align, false);
|
int FrameIdx = MFI.CreateStackObject(Bytes, Align, false);
|
||||||
return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
|
return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
|
SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
|
||||||
|
|
|
@ -1151,7 +1151,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
|
||||||
FuncInfo.StaticAllocaMap.find(AI);
|
FuncInfo.StaticAllocaMap.find(AI);
|
||||||
if (SI != FuncInfo.StaticAllocaMap.end())
|
if (SI != FuncInfo.StaticAllocaMap.end())
|
||||||
return DAG.getFrameIndex(SI->second,
|
return DAG.getFrameIndex(SI->second,
|
||||||
TLI.getPointerTy(DAG.getDataLayout()));
|
TLI.getFrameIndexTy(DAG.getDataLayout()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// If this is an instruction which fast-isel has deferred, select it now.
|
// If this is an instruction which fast-isel has deferred, select it now.
|
||||||
|
@ -5617,7 +5617,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
|
||||||
SDValue Ops[2];
|
SDValue Ops[2];
|
||||||
Ops[0] = getRoot();
|
Ops[0] = getRoot();
|
||||||
Ops[1] =
|
Ops[1] =
|
||||||
DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()), true);
|
DAG.getFrameIndex(FI, TLI.getFrameIndexTy(DAG.getDataLayout()), true);
|
||||||
unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
|
unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
|
||||||
|
|
||||||
Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops);
|
Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops);
|
||||||
|
@ -6630,7 +6630,7 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
|
||||||
unsigned Align = DL.getPrefTypeAlignment(Ty);
|
unsigned Align = DL.getPrefTypeAlignment(Ty);
|
||||||
MachineFunction &MF = DAG.getMachineFunction();
|
MachineFunction &MF = DAG.getMachineFunction();
|
||||||
int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
|
int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
|
||||||
SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy(DL));
|
SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL));
|
||||||
Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot,
|
Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot,
|
||||||
MachinePointerInfo::getFixedStack(MF, SSFI));
|
MachinePointerInfo::getFixedStack(MF, SSFI));
|
||||||
OpInfo.CallOperand = StackSlot;
|
OpInfo.CallOperand = StackSlot;
|
||||||
|
@ -7393,7 +7393,7 @@ static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx,
|
||||||
} else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
|
} else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
|
||||||
const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
|
const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
|
||||||
Ops.push_back(Builder.DAG.getTargetFrameIndex(
|
Ops.push_back(Builder.DAG.getTargetFrameIndex(
|
||||||
FI->getIndex(), TLI.getPointerTy(Builder.DAG.getDataLayout())));
|
FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout())));
|
||||||
} else
|
} else
|
||||||
Ops.push_back(OpVal);
|
Ops.push_back(OpVal);
|
||||||
}
|
}
|
||||||
|
@ -7661,7 +7661,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
|
||||||
DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
|
DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
|
||||||
Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy);
|
Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy);
|
||||||
|
|
||||||
DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL));
|
DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL));
|
||||||
ArgListEntry Entry;
|
ArgListEntry Entry;
|
||||||
Entry.Node = DemoteStackSlot;
|
Entry.Node = DemoteStackSlot;
|
||||||
Entry.Ty = StackSlotPtrType;
|
Entry.Ty = StackSlotPtrType;
|
||||||
|
|
|
@ -0,0 +1,55 @@
|
||||||
|
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
|
||||||
|
;
|
||||||
|
; The original OpenCL kernel:
|
||||||
|
; kernel void f(global int *a, int i, int j) {
|
||||||
|
; int x[100];
|
||||||
|
; x[i] = 7;
|
||||||
|
; a[0] = x[j];
|
||||||
|
; }
|
||||||
|
; clang -cc1 -triple amdgcn---amdgizcl -emit-llvm -o -
|
||||||
|
|
||||||
|
target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
|
||||||
|
target triple = "amdgcn---amdgiz"
|
||||||
|
|
||||||
|
define amdgpu_kernel void @f(i32 addrspace(1)* nocapture %a, i32 %i, i32 %j) local_unnamed_addr #0 {
|
||||||
|
entry:
|
||||||
|
; CHECK: s_load_dword s2, s[0:1], 0xb
|
||||||
|
; CHECK: s_load_dwordx2 s[4:5], s[0:1], 0x9
|
||||||
|
; CHECK: s_load_dword s0, s[0:1], 0xc
|
||||||
|
; CHECK: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
|
||||||
|
; CHECK: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
|
||||||
|
; CHECK: s_mov_b32 s10, -1
|
||||||
|
; CHECK: s_waitcnt lgkmcnt(0)
|
||||||
|
; CHECK: s_lshl_b32 s1, s2, 2
|
||||||
|
; CHECK: v_mov_b32_e32 v0, 4
|
||||||
|
; CHECK: s_mov_b32 s11, 0xe8f000
|
||||||
|
; CHECK: v_add_i32_e32 v1, vcc, s1, v0
|
||||||
|
; CHECK: v_mov_b32_e32 v2, 7
|
||||||
|
; CHECK: s_lshl_b32 s0, s0, 2
|
||||||
|
; CHECK: buffer_store_dword v2, v1, s[8:11], s3 offen
|
||||||
|
; CHECK: v_add_i32_e32 v0, vcc, s0, v0
|
||||||
|
; CHECK: buffer_load_dword v0, v0, s[8:11], s3 offen
|
||||||
|
; CHECK: s_mov_b32 s7, 0xf000
|
||||||
|
; CHECK: s_mov_b32 s6, -1
|
||||||
|
; CHECK: s_waitcnt vmcnt(0)
|
||||||
|
; CHECK: buffer_store_dword v0, off, s[4:7], 0
|
||||||
|
; CHECK: s_endpgm
|
||||||
|
|
||||||
|
%x = alloca [100 x i32], align 4, addrspace(5)
|
||||||
|
%0 = bitcast [100 x i32] addrspace(5)* %x to i8 addrspace(5)*
|
||||||
|
call void @llvm.lifetime.start.p5i8(i64 400, i8 addrspace(5)* nonnull %0) #0
|
||||||
|
%arrayidx = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %x, i32 0, i32 %i
|
||||||
|
store i32 7, i32 addrspace(5)* %arrayidx, align 4
|
||||||
|
%arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %x, i32 0, i32 %j
|
||||||
|
%1 = load i32, i32 addrspace(5)* %arrayidx2, align 4
|
||||||
|
store i32 %1, i32 addrspace(1)* %a, align 4
|
||||||
|
call void @llvm.lifetime.end.p5i8(i64 400, i8 addrspace(5)* nonnull %0) #0
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #1
|
||||||
|
|
||||||
|
declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #1
|
||||||
|
|
||||||
|
attributes #0 = { nounwind }
|
||||||
|
attributes #1 = { argmemonly nounwind }
|
Loading…
Reference in New Issue