Fix invalid addrspacecast due to combining alloca with global var
For function-scope variables with large initialisation list, FE usually
generates a global variable to hold the initializer, then generates
memcpy intrinsic to initialize the alloca. InstCombiner::visitAllocaInst
identifies such allocas which are accessed only by reading and replaces
them with the global variable. This is done by casting the global variable
to the type of the alloca and replacing all references.
However, when the global variable is in a different address space which
is disjoint with addr space 0 (e.g. for IR generated from OpenCL,
global variable cannot be in private addr space i.e. addr space 0), casting
the global variable to addr space 0 results in invalid IR for certain
targets (e.g. amdgpu).
To fix this issue, when the global variable is not in addr space 0,
instead of casting it to addr space 0, this patch chases down the uses
of alloca until reaching the load instructions, then replaces load from
alloca with load from the global variable. If during the chasing
bitcast and GEP are encountered, new bitcast and GEP based on the global
variable are generated and used in the load instructions.
Differential Revision: https://reviews.llvm.org/D27283
llvm-svn: 294786
2017-02-11 05:46:07 +08:00
|
|
|
; RUN: opt < %s -instcombine -S | FileCheck %s
|
|
|
|
|
|
|
|
@test.data = private unnamed_addr addrspace(2) constant [8 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7], align 4
|
|
|
|
|
|
|
|
; CHECK-LABEL: test_load
|
|
|
|
; CHECK: %[[GEP:.*]] = getelementptr [8 x i32], [8 x i32] addrspace(2)* @test.data, i64 0, i64 %x
|
|
|
|
; CHECK: %{{.*}} = load i32, i32 addrspace(2)* %[[GEP]]
|
|
|
|
; CHECK-NOT: alloca
|
|
|
|
; CHECK-NOT: call void @llvm.memcpy.p0i8.p2i8.i64
|
|
|
|
; CHECK-NOT: addrspacecast
|
|
|
|
; CHECK-NOT: load i32, i32*
|
|
|
|
define void @test_load(i32 addrspace(1)* %out, i64 %x) {
|
|
|
|
entry:
|
|
|
|
%data = alloca [8 x i32], align 4
|
|
|
|
%0 = bitcast [8 x i32]* %data to i8*
|
|
|
|
call void @llvm.memcpy.p0i8.p2i8.i64(i8* %0, i8 addrspace(2)* bitcast ([8 x i32] addrspace(2)* @test.data to i8 addrspace(2)*), i64 32, i32 4, i1 false)
|
|
|
|
%arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %data, i64 0, i64 %x
|
|
|
|
%1 = load i32, i32* %arrayidx, align 4
|
|
|
|
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %x
|
|
|
|
store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-02-25 04:27:25 +08:00
|
|
|
; CHECK-LABEL: test_load_bitcast_chain
|
|
|
|
; CHECK: %[[GEP:.*]] = getelementptr [8 x i32], [8 x i32] addrspace(2)* @test.data, i64 0, i64 %x
|
|
|
|
; CHECK: %{{.*}} = load i32, i32 addrspace(2)* %[[GEP]]
|
|
|
|
; CHECK-NOT: alloca
|
|
|
|
; CHECK-NOT: call void @llvm.memcpy.p0i8.p2i8.i64
|
|
|
|
; CHECK-NOT: addrspacecast
|
|
|
|
; CHECK-NOT: load i32, i32*
|
|
|
|
define void @test_load_bitcast_chain(i32 addrspace(1)* %out, i64 %x) {
|
|
|
|
entry:
|
|
|
|
%data = alloca [8 x i32], align 4
|
|
|
|
%0 = bitcast [8 x i32]* %data to i8*
|
|
|
|
call void @llvm.memcpy.p0i8.p2i8.i64(i8* %0, i8 addrspace(2)* bitcast ([8 x i32] addrspace(2)* @test.data to i8 addrspace(2)*), i64 32, i32 4, i1 false)
|
|
|
|
%1 = bitcast i8* %0 to i32*
|
|
|
|
%arrayidx = getelementptr inbounds i32, i32* %1, i64 %x
|
|
|
|
%2 = load i32, i32* %arrayidx, align 4
|
|
|
|
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %x
|
|
|
|
store i32 %2, i32 addrspace(1)* %arrayidx1, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
Fix invalid addrspacecast due to combining alloca with global var
For function-scope variables with large initialisation list, FE usually
generates a global variable to hold the initializer, then generates
memcpy intrinsic to initialize the alloca. InstCombiner::visitAllocaInst
identifies such allocas which are accessed only by reading and replaces
them with the global variable. This is done by casting the global variable
to the type of the alloca and replacing all references.
However, when the global variable is in a different address space which
is disjoint with addr space 0 (e.g. for IR generated from OpenCL,
global variable cannot be in private addr space i.e. addr space 0), casting
the global variable to addr space 0 results in invalid IR for certain
targets (e.g. amdgpu).
To fix this issue, when the global variable is not in addr space 0,
instead of casting it to addr space 0, this patch chases down the uses
of alloca until reaching the load instructions, then replaces load from
alloca with load from the global variable. If during the chasing
bitcast and GEP are encountered, new bitcast and GEP based on the global
variable are generated and used in the load instructions.
Differential Revision: https://reviews.llvm.org/D27283
llvm-svn: 294786
2017-02-11 05:46:07 +08:00
|
|
|
; CHECK-LABEL: test_call
|
|
|
|
; CHECK: alloca
|
|
|
|
; CHECK: call void @llvm.memcpy.p0i8.p2i8.i64
|
|
|
|
; CHECK-NOT: addrspacecast
|
|
|
|
; CHECK: call i32 @foo(i32* %{{.*}})
|
|
|
|
define void @test_call(i32 addrspace(1)* %out, i64 %x) {
|
|
|
|
entry:
|
|
|
|
%data = alloca [8 x i32], align 4
|
|
|
|
%0 = bitcast [8 x i32]* %data to i8*
|
|
|
|
call void @llvm.memcpy.p0i8.p2i8.i64(i8* %0, i8 addrspace(2)* bitcast ([8 x i32] addrspace(2)* @test.data to i8 addrspace(2)*), i64 32, i32 4, i1 false)
|
|
|
|
%arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %data, i64 0, i64 %x
|
|
|
|
%1 = call i32 @foo(i32* %arrayidx)
|
|
|
|
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %x
|
|
|
|
store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: test_load_and_call
|
|
|
|
; CHECK: alloca
|
|
|
|
; CHECK: call void @llvm.memcpy.p0i8.p2i8.i64
|
|
|
|
; CHECK: load i32, i32* %{{.*}}
|
|
|
|
; CHECK: call i32 @foo(i32* %{{.*}})
|
|
|
|
; CHECK-NOT: addrspacecast
|
|
|
|
; CHECK-NOT: load i32, i32 addrspace(2)*
|
|
|
|
define void @test_load_and_call(i32 addrspace(1)* %out, i64 %x, i64 %y) {
|
|
|
|
entry:
|
|
|
|
%data = alloca [8 x i32], align 4
|
|
|
|
%0 = bitcast [8 x i32]* %data to i8*
|
|
|
|
call void @llvm.memcpy.p0i8.p2i8.i64(i8* %0, i8 addrspace(2)* bitcast ([8 x i32] addrspace(2)* @test.data to i8 addrspace(2)*), i64 32, i32 4, i1 false)
|
|
|
|
%arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %data, i64 0, i64 %x
|
|
|
|
%1 = load i32, i32* %arrayidx, align 4
|
|
|
|
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %x
|
|
|
|
store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
|
|
|
|
%2 = call i32 @foo(i32* %arrayidx)
|
|
|
|
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %y
|
|
|
|
store i32 %2, i32 addrspace(1)* %arrayidx2, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
declare void @llvm.memcpy.p0i8.p2i8.i64(i8* nocapture writeonly, i8 addrspace(2)* nocapture readonly, i64, i32, i1)
|
|
|
|
declare i32 @foo(i32* %x)
|