forked from OSchip/llvm-project
[SROA] Fix APInt size when load/store have different address space
Currently there is a bug in SROA::presplitLoadsAndStores which causes assertion in GEPOperator::accumulateConstantOffset. Basically it does not consider the situation that the pointer operand of load or store may be in a non-zero address space and its size may be different from the size of a pointer in address space 0. This patch fixes assertion when compiling Blender Cycles kernels for amdgpu backend. Diffferential Revision: https://reviews.llvm.org/D33298 llvm-svn: 305107
This commit is contained in:
parent
bd336e44d8
commit
6455b0dbf3
|
@ -3626,10 +3626,12 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
|
|||
auto *PartPtrTy =
|
||||
PLoad->getType()->getPointerTo(SI->getPointerAddressSpace());
|
||||
|
||||
auto AS = SI->getPointerAddressSpace();
|
||||
StoreInst *PStore = IRB.CreateAlignedStore(
|
||||
PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
|
||||
APInt(DL.getPointerSizeInBits(), PartOffset),
|
||||
PartPtrTy, StoreBasePtr->getName() + "."),
|
||||
PLoad,
|
||||
getAdjustedPtr(IRB, DL, StoreBasePtr,
|
||||
APInt(DL.getPointerSizeInBits(AS), PartOffset),
|
||||
PartPtrTy, StoreBasePtr->getName() + "."),
|
||||
getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
|
||||
PStore->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
|
||||
DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
|
||||
|
@ -3707,9 +3709,10 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
|
|||
PLoad = (*SplitLoads)[Idx];
|
||||
} else {
|
||||
IRB.SetInsertPoint(LI);
|
||||
auto AS = LI->getPointerAddressSpace();
|
||||
PLoad = IRB.CreateAlignedLoad(
|
||||
getAdjustedPtr(IRB, DL, LoadBasePtr,
|
||||
APInt(DL.getPointerSizeInBits(), PartOffset),
|
||||
APInt(DL.getPointerSizeInBits(AS), PartOffset),
|
||||
LoadPartPtrTy, LoadBasePtr->getName() + "."),
|
||||
getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
|
||||
LI->getName());
|
||||
|
@ -3717,10 +3720,12 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
|
|||
|
||||
// And store this partition.
|
||||
IRB.SetInsertPoint(SI);
|
||||
auto AS = SI->getPointerAddressSpace();
|
||||
StoreInst *PStore = IRB.CreateAlignedStore(
|
||||
PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
|
||||
APInt(DL.getPointerSizeInBits(), PartOffset),
|
||||
StorePartPtrTy, StoreBasePtr->getName() + "."),
|
||||
PLoad,
|
||||
getAdjustedPtr(IRB, DL, StoreBasePtr,
|
||||
APInt(DL.getPointerSizeInBits(AS), PartOffset),
|
||||
StorePartPtrTy, StoreBasePtr->getName() + "."),
|
||||
getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
|
||||
|
||||
// Now build a new slice for the alloca.
|
||||
|
|
|
@ -101,3 +101,31 @@ entry:
|
|||
%ret = fadd float %f1, %f2
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
; Test load from and store to non-zero address space.
|
||||
define void @test_load_store_diff_addr_space([2 x float] addrspace(1)* %complex1, [2 x float] addrspace(1)* %complex2) {
|
||||
; CHECK-LABEL: @test_load_store_diff_addr_space
|
||||
; CHECK-NOT: alloca
|
||||
; CHECK: load i32, i32 addrspace(1)*
|
||||
; CHECK: load i32, i32 addrspace(1)*
|
||||
; CHECK: store i32 %{{.*}}, i32 addrspace(1)*
|
||||
; CHECK: store i32 %{{.*}}, i32 addrspace(1)*
|
||||
%a = alloca i64
|
||||
%a.cast = bitcast i64* %a to [2 x float]*
|
||||
%a.gep1 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 0
|
||||
%a.gep2 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 1
|
||||
%complex1.gep = getelementptr [2 x float], [2 x float] addrspace(1)* %complex1, i32 0, i32 0
|
||||
%p1 = bitcast float addrspace(1)* %complex1.gep to i64 addrspace(1)*
|
||||
%v1 = load i64, i64 addrspace(1)* %p1
|
||||
store i64 %v1, i64* %a
|
||||
%f1 = load float, float* %a.gep1
|
||||
%f2 = load float, float* %a.gep2
|
||||
%sum = fadd float %f1, %f2
|
||||
store float %sum, float* %a.gep1
|
||||
store float %sum, float* %a.gep2
|
||||
%v2 = load i64, i64* %a
|
||||
%complex2.gep = getelementptr [2 x float], [2 x float] addrspace(1)* %complex2, i32 0, i32 0
|
||||
%p2 = bitcast float addrspace(1)* %complex2.gep to i64 addrspace(1)*
|
||||
store i64 %v2, i64 addrspace(1)* %p2
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue