forked from OSchip/llvm-project
AMDGPU: Fix using old address spaces in some tests
llvm-svn: 348385
This commit is contained in:
parent
ac52954703
commit
b3e14de487
|
@ -1,45 +1,46 @@
|
|||
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s
|
||||
|
||||
; Nothing should be done if the addrspacecast is captured.
|
||||
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
|
||||
|
||||
declare void @consume_ptr2int(i32) #0
|
||||
|
||||
; CHECK-LABEL: @addrspacecast_captured(
|
||||
; CHECK: %data = alloca i32, align 4
|
||||
; CHECK: %cast = addrspacecast i32* %data to i32 addrspace(4)*
|
||||
; CHECK: %ptr2int = ptrtoint i32 addrspace(4)* %cast to i32
|
||||
; CHECK: %data = alloca i32, align 4, addrspace(5)
|
||||
; CHECK: %cast = addrspacecast i32 addrspace(5)* %data to i32*
|
||||
; CHECK: %ptr2int = ptrtoint i32* %cast to i32
|
||||
; CHECK: store i32 %ptr2int, i32 addrspace(1)* %out
|
||||
define amdgpu_kernel void @addrspacecast_captured(i32 addrspace(1)* %out) #0 {
|
||||
entry:
|
||||
%data = alloca i32, align 4
|
||||
%cast = addrspacecast i32* %data to i32 addrspace(4)*
|
||||
%ptr2int = ptrtoint i32 addrspace(4)* %cast to i32
|
||||
%data = alloca i32, align 4, addrspace(5)
|
||||
%cast = addrspacecast i32 addrspace(5)* %data to i32*
|
||||
%ptr2int = ptrtoint i32* %cast to i32
|
||||
store i32 %ptr2int, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @addrspacecast_captured_store(
|
||||
; CHECK: %data = alloca i32, align 4
|
||||
; CHECK: %cast = addrspacecast i32* %data to i32 addrspace(4)*
|
||||
; CHECK: store i32 addrspace(4)* %cast, i32 addrspace(4)* addrspace(1)* %out
|
||||
define amdgpu_kernel void @addrspacecast_captured_store(i32 addrspace(4)* addrspace(1)* %out) #0 {
|
||||
; CHECK: %data = alloca i32, align 4, addrspace(5)
|
||||
; CHECK: %cast = addrspacecast i32 addrspace(5)* %data to i32*
|
||||
; CHECK: store i32* %cast, i32* addrspace(1)* %out
|
||||
define amdgpu_kernel void @addrspacecast_captured_store(i32* addrspace(1)* %out) #0 {
|
||||
entry:
|
||||
%data = alloca i32, align 4
|
||||
%cast = addrspacecast i32* %data to i32 addrspace(4)*
|
||||
store i32 addrspace(4)* %cast, i32 addrspace(4)* addrspace(1)* %out
|
||||
%data = alloca i32, align 4, addrspace(5)
|
||||
%cast = addrspacecast i32 addrspace(5)* %data to i32*
|
||||
store i32* %cast, i32* addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @addrspacecast_captured_call(
|
||||
; CHECK: %data = alloca i32, align 4
|
||||
; CHECK: %cast = addrspacecast i32* %data to i32 addrspace(4)*
|
||||
; CHECK: %ptr2int = ptrtoint i32 addrspace(4)* %cast to i32
|
||||
; CHECK: %data = alloca i32, align 4, addrspace(5)
|
||||
; CHECK: %cast = addrspacecast i32 addrspace(5)* %data to i32*
|
||||
; CHECK: %ptr2int = ptrtoint i32* %cast to i32
|
||||
; CHECK: call void @consume_ptr2int(i32 %ptr2int)
|
||||
define amdgpu_kernel void @addrspacecast_captured_call() #0 {
|
||||
entry:
|
||||
%data = alloca i32, align 4
|
||||
%cast = addrspacecast i32* %data to i32 addrspace(4)*
|
||||
%ptr2int = ptrtoint i32 addrspace(4)* %cast to i32
|
||||
%data = alloca i32, align 4, addrspace(5)
|
||||
%cast = addrspacecast i32 addrspace(5)* %data to i32*
|
||||
%ptr2int = ptrtoint i32* %cast to i32
|
||||
call void @consume_ptr2int(i32 %ptr2int)
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -1,12 +1,14 @@
|
|||
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
|
||||
|
||||
; CHECK-LABEL: @lds_promoted_alloca_select_invalid_pointer_operand(
|
||||
; CHECK: %alloca = alloca i32
|
||||
; CHECK: select i1 undef, i32* undef, i32* %alloca
|
||||
; CHECK: select i1 undef, i32 addrspace(5)* undef, i32 addrspace(5)* %alloca
|
||||
define amdgpu_kernel void @lds_promoted_alloca_select_invalid_pointer_operand() #0 {
|
||||
%alloca = alloca i32, align 4
|
||||
%select = select i1 undef, i32* undef, i32* %alloca
|
||||
store i32 0, i32* %select, align 4
|
||||
%alloca = alloca i32, align 4, addrspace(5)
|
||||
%select = select i1 undef, i32 addrspace(5)* undef, i32 addrspace(5)* %alloca
|
||||
store i32 0, i32 addrspace(5)* %select, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -17,11 +19,11 @@ define amdgpu_kernel void @lds_promoted_alloca_select_invalid_pointer_operand()
|
|||
; CHECK: %select = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
|
||||
; CHECK: store i32 0, i32 addrspace(3)* %select, align 4
|
||||
define amdgpu_kernel void @lds_promote_alloca_select_two_derived_pointers(i32 %a, i32 %b) #0 {
|
||||
%alloca = alloca [16 x i32], align 4
|
||||
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
|
||||
%ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
|
||||
%select = select i1 undef, i32* %ptr0, i32* %ptr1
|
||||
store i32 0, i32* %select, align 4
|
||||
%alloca = alloca [16 x i32], align 4, addrspace(5)
|
||||
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a
|
||||
%ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %b
|
||||
%select = select i1 undef, i32 addrspace(5)* %ptr0, i32 addrspace(5)* %ptr1
|
||||
store i32 0, i32 addrspace(5)* %select, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -30,16 +32,16 @@ define amdgpu_kernel void @lds_promote_alloca_select_two_derived_pointers(i32 %a
|
|||
; CHECK-LABEL: @lds_promote_alloca_select_two_allocas(
|
||||
; CHECK: %alloca0 = alloca i32, i32 16, align 4
|
||||
; CHECK: %alloca1 = alloca i32, i32 16, align 4
|
||||
; CHECK: %ptr0 = getelementptr inbounds i32, i32* %alloca0, i32 %a
|
||||
; CHECK: %ptr1 = getelementptr inbounds i32, i32* %alloca1, i32 %b
|
||||
; CHECK: %select = select i1 undef, i32* %ptr0, i32* %ptr1
|
||||
; CHECK: %ptr0 = getelementptr inbounds i32, i32 addrspace(5)* %alloca0, i32 %a
|
||||
; CHECK: %ptr1 = getelementptr inbounds i32, i32 addrspace(5)* %alloca1, i32 %b
|
||||
; CHECK: %select = select i1 undef, i32 addrspace(5)* %ptr0, i32 addrspace(5)* %ptr1
|
||||
define amdgpu_kernel void @lds_promote_alloca_select_two_allocas(i32 %a, i32 %b) #0 {
|
||||
%alloca0 = alloca i32, i32 16, align 4
|
||||
%alloca1 = alloca i32, i32 16, align 4
|
||||
%ptr0 = getelementptr inbounds i32, i32* %alloca0, i32 %a
|
||||
%ptr1 = getelementptr inbounds i32, i32* %alloca1, i32 %b
|
||||
%select = select i1 undef, i32* %ptr0, i32* %ptr1
|
||||
store i32 0, i32* %select, align 4
|
||||
%alloca0 = alloca i32, i32 16, align 4, addrspace(5)
|
||||
%alloca1 = alloca i32, i32 16, align 4, addrspace(5)
|
||||
%ptr0 = getelementptr inbounds i32, i32 addrspace(5)* %alloca0, i32 %a
|
||||
%ptr1 = getelementptr inbounds i32, i32 addrspace(5)* %alloca1, i32 %b
|
||||
%select = select i1 undef, i32 addrspace(5)* %ptr0, i32 addrspace(5)* %ptr1
|
||||
store i32 0, i32 addrspace(5)* %select, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -51,11 +53,11 @@ define amdgpu_kernel void @lds_promote_alloca_select_two_allocas(i32 %a, i32 %b)
|
|||
; CHECK: %select = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
|
||||
; CHECK: store i32 0, i32 addrspace(3)* %select, align 4
|
||||
define amdgpu_kernel void @lds_promote_alloca_select_two_derived_constant_pointers() #0 {
|
||||
%alloca = alloca [16 x i32], align 4
|
||||
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 1
|
||||
%ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 3
|
||||
%select = select i1 undef, i32* %ptr0, i32* %ptr1
|
||||
store i32 0, i32* %select, align 4
|
||||
%alloca = alloca [16 x i32], align 4, addrspace(5)
|
||||
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 1
|
||||
%ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 3
|
||||
%select = select i1 undef, i32 addrspace(5)* %ptr0, i32 addrspace(5)* %ptr1
|
||||
store i32 0, i32 addrspace(5)* %select, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -68,34 +70,34 @@ define amdgpu_kernel void @lds_promote_alloca_select_two_derived_constant_pointe
|
|||
; CHECK: %select1 = select i1 undef, i32 addrspace(3)* %select0, i32 addrspace(3)* %ptr2
|
||||
; CHECK: store i32 0, i32 addrspace(3)* %select1, align 4
|
||||
define amdgpu_kernel void @lds_promoted_alloca_select_input_select(i32 %a, i32 %b, i32 %c) #0 {
|
||||
%alloca = alloca [16 x i32], align 4
|
||||
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
|
||||
%ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
|
||||
%ptr2 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %c
|
||||
%select0 = select i1 undef, i32* %ptr0, i32* %ptr1
|
||||
%select1 = select i1 undef, i32* %select0, i32* %ptr2
|
||||
store i32 0, i32* %select1, align 4
|
||||
%alloca = alloca [16 x i32], align 4, addrspace(5)
|
||||
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a
|
||||
%ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %b
|
||||
%ptr2 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %c
|
||||
%select0 = select i1 undef, i32 addrspace(5)* %ptr0, i32 addrspace(5)* %ptr1
|
||||
%select1 = select i1 undef, i32 addrspace(5)* %select0, i32 addrspace(5)* %ptr2
|
||||
store i32 0, i32 addrspace(5)* %select1, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @lds_promoted_alloca_select_input_phi(i32 %a, i32 %b, i32 %c) #0 {
|
||||
entry:
|
||||
%alloca = alloca [16 x i32], align 4
|
||||
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
|
||||
%ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
|
||||
store i32 0, i32* %ptr0
|
||||
%alloca = alloca [16 x i32], align 4, addrspace(5)
|
||||
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a
|
||||
%ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %b
|
||||
store i32 0, i32 addrspace(5)* %ptr0
|
||||
br i1 undef, label %bb1, label %bb2
|
||||
|
||||
bb1:
|
||||
%ptr2 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %c
|
||||
%select0 = select i1 undef, i32* undef, i32* %ptr2
|
||||
store i32 0, i32* %ptr1
|
||||
%ptr2 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %c
|
||||
%select0 = select i1 undef, i32 addrspace(5)* undef, i32 addrspace(5)* %ptr2
|
||||
store i32 0, i32 addrspace(5)* %ptr1
|
||||
br label %bb2
|
||||
|
||||
bb2:
|
||||
%phi.ptr = phi i32* [ %ptr0, %entry ], [ %select0, %bb1 ]
|
||||
%select1 = select i1 undef, i32* %phi.ptr, i32* %ptr1
|
||||
store i32 0, i32* %select1, align 4
|
||||
%phi.ptr = phi i32 addrspace(5)* [ %ptr0, %entry ], [ %select0, %bb1 ]
|
||||
%select1 = select i1 undef, i32 addrspace(5)* %phi.ptr, i32 addrspace(5)* %ptr1
|
||||
store i32 0, i32 addrspace(5)* %select1, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -104,12 +106,12 @@ bb2:
|
|||
; CHECK: select i1 %tmp2, double addrspace(3)* %{{[0-9]+}}, double addrspace(3)* null
|
||||
define amdgpu_kernel void @select_null_rhs(double addrspace(1)* nocapture %arg, i32 %arg1) #1 {
|
||||
bb:
|
||||
%tmp = alloca double, align 8
|
||||
store double 0.000000e+00, double* %tmp, align 8
|
||||
%tmp = alloca double, align 8, addrspace(5)
|
||||
store double 0.000000e+00, double addrspace(5)* %tmp, align 8
|
||||
%tmp2 = icmp eq i32 %arg1, 0
|
||||
%tmp3 = select i1 %tmp2, double* %tmp, double* null
|
||||
store double 1.000000e+00, double* %tmp3, align 8
|
||||
%tmp4 = load double, double* %tmp, align 8
|
||||
%tmp3 = select i1 %tmp2, double addrspace(5)* %tmp, double addrspace(5)* null
|
||||
store double 1.000000e+00, double addrspace(5)* %tmp3, align 8
|
||||
%tmp4 = load double, double addrspace(5)* %tmp, align 8
|
||||
store double %tmp4, double addrspace(1)* %arg
|
||||
ret void
|
||||
}
|
||||
|
@ -119,12 +121,12 @@ bb:
|
|||
; CHECK: select i1 %tmp2, double addrspace(3)* null, double addrspace(3)* %{{[0-9]+}}
|
||||
define amdgpu_kernel void @select_null_lhs(double addrspace(1)* nocapture %arg, i32 %arg1) #1 {
|
||||
bb:
|
||||
%tmp = alloca double, align 8
|
||||
store double 0.000000e+00, double* %tmp, align 8
|
||||
%tmp = alloca double, align 8, addrspace(5)
|
||||
store double 0.000000e+00, double addrspace(5)* %tmp, align 8
|
||||
%tmp2 = icmp eq i32 %arg1, 0
|
||||
%tmp3 = select i1 %tmp2, double* null, double* %tmp
|
||||
store double 1.000000e+00, double* %tmp3, align 8
|
||||
%tmp4 = load double, double* %tmp, align 8
|
||||
%tmp3 = select i1 %tmp2, double addrspace(5)* null, double addrspace(5)* %tmp
|
||||
store double 1.000000e+00, double addrspace(5)* %tmp3, align 8
|
||||
%tmp4 = load double, double addrspace(5)* %tmp, align 8
|
||||
store double %tmp4, double addrspace(1)* %arg
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue