[AMDGPU] Add some missing check prefixes and tweak test

The test needed some extra ALU instructions to prevent it from being
memory bound.
This commit is contained in:
Jay Foad 2020-07-17 12:57:23 +01:00
parent 2dc3d1b313
commit f05bce86af
1 changed files with 8 additions and 5 deletions

View File

@ -30,22 +30,25 @@ bb:
} }
; GCN-LABEL: {{^}}test_large_stride: ; GCN-LABEL: {{^}}test_large_stride:
; MemoryBound: 0 ; GCN: MemoryBound: 0
; WaveLimiterHint : 1 ; GCN: WaveLimiterHint : 1
define amdgpu_kernel void @test_large_stride(i32 addrspace(1)* nocapture %arg) { define amdgpu_kernel void @test_large_stride(i32 addrspace(1)* nocapture %arg) {
bb: bb:
%tmp = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 4096 %tmp = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 4096
%tmp1 = load i32, i32 addrspace(1)* %tmp, align 4 %tmp1 = load i32, i32 addrspace(1)* %tmp, align 4
%mul1 = mul i32 %tmp1, %tmp1
%tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
store i32 %tmp1, i32 addrspace(1)* %tmp2, align 4 store i32 %mul1, i32 addrspace(1)* %tmp2, align 4
%tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 8192 %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 8192
%tmp4 = load i32, i32 addrspace(1)* %tmp3, align 4 %tmp4 = load i32, i32 addrspace(1)* %tmp3, align 4
%mul4 = mul i32 %tmp4, %tmp4
%tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2 %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
store i32 %tmp4, i32 addrspace(1)* %tmp5, align 4 store i32 %mul4, i32 addrspace(1)* %tmp5, align 4
%tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 12288 %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 12288
%tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4 %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
%mul7 = mul i32 %tmp7, %tmp7
%tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 3 %tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 3
store i32 %tmp7, i32 addrspace(1)* %tmp8, align 4 store i32 %mul7, i32 addrspace(1)* %tmp8, align 4
ret void ret void
} }