forked from OSchip/llvm-project
[AMDGPU] Add some missing check prefixes and tweak test
The test needed some extra ALU instructions to prevent it from being memory bound.
This commit is contained in:
parent
2dc3d1b313
commit
f05bce86af
|
@ -30,22 +30,25 @@ bb:
|
||||||
}
|
}
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}test_large_stride:
|
; GCN-LABEL: {{^}}test_large_stride:
|
||||||
; MemoryBound: 0
|
; GCN: MemoryBound: 0
|
||||||
; WaveLimiterHint : 1
|
; GCN: WaveLimiterHint : 1
|
||||||
define amdgpu_kernel void @test_large_stride(i32 addrspace(1)* nocapture %arg) {
|
define amdgpu_kernel void @test_large_stride(i32 addrspace(1)* nocapture %arg) {
|
||||||
bb:
|
bb:
|
||||||
%tmp = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 4096
|
%tmp = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 4096
|
||||||
%tmp1 = load i32, i32 addrspace(1)* %tmp, align 4
|
%tmp1 = load i32, i32 addrspace(1)* %tmp, align 4
|
||||||
|
%mul1 = mul i32 %tmp1, %tmp1
|
||||||
%tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
|
%tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
|
||||||
store i32 %tmp1, i32 addrspace(1)* %tmp2, align 4
|
store i32 %mul1, i32 addrspace(1)* %tmp2, align 4
|
||||||
%tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 8192
|
%tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 8192
|
||||||
%tmp4 = load i32, i32 addrspace(1)* %tmp3, align 4
|
%tmp4 = load i32, i32 addrspace(1)* %tmp3, align 4
|
||||||
|
%mul4 = mul i32 %tmp4, %tmp4
|
||||||
%tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
|
%tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
|
||||||
store i32 %tmp4, i32 addrspace(1)* %tmp5, align 4
|
store i32 %mul4, i32 addrspace(1)* %tmp5, align 4
|
||||||
%tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 12288
|
%tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 12288
|
||||||
%tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
|
%tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
|
||||||
|
%mul7 = mul i32 %tmp7, %tmp7
|
||||||
%tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 3
|
%tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 3
|
||||||
store i32 %tmp7, i32 addrspace(1)* %tmp8, align 4
|
store i32 %mul7, i32 addrspace(1)* %tmp8, align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue