2015-12-16 06:39:36 +08:00
|
|
|
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NOHSA %s
|
|
|
|
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA %s
|
2015-12-10 10:13:01 +08:00
|
|
|
|
2018-02-14 02:00:25 +08:00
|
|
|
@private1 = private unnamed_addr addrspace(4) constant [4 x float] [float 0.0, float 1.0, float 2.0, float 3.0]
|
|
|
|
@private2 = private unnamed_addr addrspace(4) constant [4 x float] [float 4.0, float 5.0, float 6.0, float 7.0]
|
|
|
|
@available_externally = available_externally addrspace(4) global [256 x i32] zeroinitializer
|
2015-12-10 10:13:01 +08:00
|
|
|
|
2016-10-21 02:12:38 +08:00
|
|
|
; GCN-LABEL: {{^}}private_test:
|
2015-12-10 10:13:01 +08:00
|
|
|
; GCN: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}}
|
2016-10-21 02:12:38 +08:00
|
|
|
|
|
|
|
; Non-HSA OSes use fixup into .text section.
|
|
|
|
; NOHSA: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], private1
|
|
|
|
; NOHSA: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], 0
|
|
|
|
|
|
|
|
; HSA OSes use relocations.
|
|
|
|
; HSA: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], private1@rel32@lo+4
|
|
|
|
; HSA: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], private1@rel32@hi+4
|
|
|
|
|
2015-12-10 10:13:01 +08:00
|
|
|
; GCN: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}}
|
2016-10-21 02:12:38 +08:00
|
|
|
|
|
|
|
; Non-HSA OSes use fixup into .text section.
|
|
|
|
; NOHSA: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], private2
|
|
|
|
; NOHSA: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], 0
|
|
|
|
|
|
|
|
; HSA OSes use relocations.
|
|
|
|
; HSA: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], private2@rel32@lo+4
|
|
|
|
; HSA: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], private2@rel32@hi+4
|
|
|
|
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @private_test(i32 %index, float addrspace(1)* %out) {
|
2018-02-14 02:00:25 +08:00
|
|
|
%ptr = getelementptr [4 x float], [4 x float] addrspace(4) * @private1, i32 0, i32 %index
|
|
|
|
%val = load float, float addrspace(4)* %ptr
|
Elide stores which are overwritten without being observed.
Summary:
In SelectionDAG, when a store is immediately chained to another store
to the same address, elide the first store as it has no observable
effects. This is causes small improvements dealing with intrinsics
lowered to stores.
Test notes:
* Many testcases overwrite store addresses multiple times and needed
minor changes, mainly making stores volatile to prevent the
optimization from optimizing the test away.
* Many X86 test cases optimized out instructions associated with
associated with va_start.
* Note that test_splat in CodeGen/AArch64/misched-stp.ll no longer has
dependencies to check and can probably be removed and potentially
replaced with another test.
Reviewers: rnk, john.brawn
Subscribers: aemerson, rengolin, qcolombet, jyknight, nemanjai, nhaehnle, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D33206
llvm-svn: 303198
2017-05-17 03:43:56 +08:00
|
|
|
store volatile float %val, float addrspace(1)* %out
|
2018-02-14 02:00:25 +08:00
|
|
|
%ptr2 = getelementptr [4 x float], [4 x float] addrspace(4) * @private2, i32 0, i32 %index
|
|
|
|
%val2 = load float, float addrspace(4)* %ptr2
|
Elide stores which are overwritten without being observed.
Summary:
In SelectionDAG, when a store is immediately chained to another store
to the same address, elide the first store as it has no observable
effects. This is causes small improvements dealing with intrinsics
lowered to stores.
Test notes:
* Many testcases overwrite store addresses multiple times and needed
minor changes, mainly making stores volatile to prevent the
optimization from optimizing the test away.
* Many X86 test cases optimized out instructions associated with
associated with va_start.
* Note that test_splat in CodeGen/AArch64/misched-stp.ll no longer has
dependencies to check and can probably be removed and potentially
replaced with another test.
Reviewers: rnk, john.brawn
Subscribers: aemerson, rengolin, qcolombet, jyknight, nemanjai, nhaehnle, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D33206
llvm-svn: 303198
2017-05-17 03:43:56 +08:00
|
|
|
store volatile float %val2, float addrspace(1)* %out
|
2015-12-10 10:13:01 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2016-10-21 02:12:38 +08:00
|
|
|
; HSA-LABEL: {{^}}available_externally_test:
|
|
|
|
; HSA: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}}
|
|
|
|
; HSA: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], available_externally@gotpcrel32@lo+4
|
|
|
|
; HSA: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], available_externally@gotpcrel32@hi+4
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @available_externally_test(i32 addrspace(1)* %out) {
|
2018-02-14 02:00:25 +08:00
|
|
|
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(4)* @available_externally, i32 0, i32 1
|
|
|
|
%val = load i32, i32 addrspace(4)* %ptr
|
2016-10-21 02:12:38 +08:00
|
|
|
store i32 %val, i32 addrspace(1)* %out
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; NOHSA: .text
|
|
|
|
; HSA: .section .rodata
|
|
|
|
|
|
|
|
; GCN: private1:
|
|
|
|
; GCN: private2:
|