forked from OSchip/llvm-project
AMDGPU: Remove some old intrinsic uses from tests
llvm-svn: 260493
This commit is contained in:
parent
4244be25bd
commit
9c47dd583a
|
@ -1,13 +1,13 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() readnone
|
||||
|
||||
; SI-LABEL: {{^}}test_i64_vreg:
|
||||
; SI: v_add_i32
|
||||
; SI: v_addc_u32
|
||||
define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
|
||||
%a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid
|
||||
%b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid
|
||||
%a = load i64, i64 addrspace(1)* %a_ptr
|
||||
|
@ -59,7 +59,7 @@ define void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %a,
|
|||
; SI: v_add_i32
|
||||
; SI: v_addc_u32
|
||||
define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
|
||||
%a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid
|
||||
%b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid
|
||||
%a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr
|
||||
|
|
|
@ -47,19 +47,19 @@ end:
|
|||
; CHECK: flat_load_dword
|
||||
define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
|
||||
%alloca = alloca i32, i32 9, align 4
|
||||
%x = call i32 @llvm.r600.read.tidig.x() #3
|
||||
%x = call i32 @llvm.amdgcn.workitem.id.x() #3
|
||||
%pptr = getelementptr i32, i32* %alloca, i32 %x
|
||||
%fptr = addrspacecast i32* %pptr to i32 addrspace(4)*
|
||||
store i32 %x, i32 addrspace(4)* %fptr
|
||||
; Dummy call
|
||||
call void @llvm.AMDGPU.barrier.local() #1
|
||||
call void @llvm.amdgcn.s.barrier() #1
|
||||
%reload = load i32, i32 addrspace(4)* %fptr, align 4
|
||||
store i32 %reload, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.AMDGPU.barrier.local() #1
|
||||
declare i32 @llvm.r600.read.tidig.x() #3
|
||||
declare void @llvm.amdgcn.s.barrier() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #3
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind convergent }
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
|
||||
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
|
||||
; RUN: llc -verify-machineinstrs -march=amdgcn -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
|
||||
; RUN: llc -verify-machineinstrs -march=amdgcn -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
|
||||
|
||||
declare i32 @llvm.SI.tid() nounwind readnone
|
||||
declare void @llvm.AMDGPU.barrier.local() nounwind convergent
|
||||
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
|
||||
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
|
||||
declare void @llvm.amdgcn.s.barrier() #2
|
||||
|
||||
; The required pointer calculations for the alloca'd actually requires
|
||||
; an add and won't be folded into the addressing, which fails with a
|
||||
|
@ -24,9 +25,10 @@ declare void @llvm.AMDGPU.barrier.local() nounwind convergent
|
|||
|
||||
; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 16
|
||||
; SI-PROMOTE: ds_write_b32 [[PTRREG]]
|
||||
define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
|
||||
define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) #0 {
|
||||
%alloca = alloca [4 x i32], i32 4, align 16
|
||||
%tid = call i32 @llvm.SI.tid() readnone
|
||||
%mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0);
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo)
|
||||
%a_ptr = getelementptr inbounds i32, i32 addrspace(1)* %inA, i32 %tid
|
||||
%b_ptr = getelementptr inbounds i32, i32 addrspace(1)* %inB, i32 %tid
|
||||
%a = load i32, i32 addrspace(1)* %a_ptr
|
||||
|
@ -35,10 +37,13 @@ define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 add
|
|||
%alloca_ptr = getelementptr inbounds [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b
|
||||
store i32 %result, i32* %alloca_ptr, align 4
|
||||
; Dummy call
|
||||
call void @llvm.AMDGPU.barrier.local() nounwind convergent
|
||||
call void @llvm.amdgcn.s.barrier()
|
||||
%reload = load i32, i32* %alloca_ptr, align 4
|
||||
%out_ptr = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
|
||||
store i32 %reload, i32 addrspace(1)* %out_ptr, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
attributes #2 = { nounwind convergent }
|
||||
|
|
|
@ -5,8 +5,6 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
|
||||
; OPT-LABEL: @test_sink_global_small_offset_i32(
|
||||
; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in
|
||||
; OPT-VI: getelementptr i32, i32 addrspace(1)* %in
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
; --------------------------------------------------------------------------------
|
||||
; i32 compares
|
||||
|
@ -9,7 +9,7 @@ declare i32 @llvm.r600.read.tidig.x() #0
|
|||
; GCN-LABEL: {{^}}commute_eq_64_i32:
|
||||
; GCN: v_cmp_eq_i32_e32 vcc, 64, v{{[0-9]+}}
|
||||
define void @commute_eq_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %gep.in
|
||||
|
@ -22,7 +22,7 @@ define void @commute_eq_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1
|
|||
; GCN-LABEL: {{^}}commute_ne_64_i32:
|
||||
; GCN: v_cmp_ne_i32_e32 vcc, 64, v{{[0-9]+}}
|
||||
define void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %gep.in
|
||||
|
@ -37,7 +37,7 @@ define void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1
|
|||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3039
|
||||
; GCN: v_cmp_ne_i32_e32 vcc, [[K]], v{{[0-9]+}}
|
||||
define void @commute_ne_litk_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %gep.in
|
||||
|
@ -50,7 +50,7 @@ define void @commute_ne_litk_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
|
|||
; GCN-LABEL: {{^}}commute_ugt_64_i32:
|
||||
; GCN: v_cmp_lt_u32_e32 vcc, 64, v{{[0-9]+}}
|
||||
define void @commute_ugt_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %gep.in
|
||||
|
@ -63,7 +63,7 @@ define void @commute_ugt_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #
|
|||
; GCN-LABEL: {{^}}commute_uge_64_i32:
|
||||
; GCN: v_cmp_lt_u32_e32 vcc, 63, v{{[0-9]+}}
|
||||
define void @commute_uge_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %gep.in
|
||||
|
@ -76,7 +76,7 @@ define void @commute_uge_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #
|
|||
; GCN-LABEL: {{^}}commute_ult_64_i32:
|
||||
; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}}
|
||||
define void @commute_ult_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %gep.in
|
||||
|
@ -89,7 +89,7 @@ define void @commute_ult_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #
|
|||
; GCN-LABEL: {{^}}commute_ule_63_i32:
|
||||
; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}}
|
||||
define void @commute_ule_63_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %gep.in
|
||||
|
@ -105,7 +105,7 @@ define void @commute_ule_63_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #
|
|||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x41{{$}}
|
||||
; GCN: v_cmp_gt_u32_e32 vcc, [[K]], v{{[0-9]+}}
|
||||
define void @commute_ule_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %gep.in
|
||||
|
@ -118,7 +118,7 @@ define void @commute_ule_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #
|
|||
; GCN-LABEL: {{^}}commute_sgt_neg1_i32:
|
||||
; GCN: v_cmp_lt_i32_e32 vcc, -1, v{{[0-9]+}}
|
||||
define void @commute_sgt_neg1_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %gep.in
|
||||
|
@ -131,7 +131,7 @@ define void @commute_sgt_neg1_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
|
|||
; GCN-LABEL: {{^}}commute_sge_neg2_i32:
|
||||
; GCN: v_cmp_lt_i32_e32 vcc, -3, v{{[0-9]+}}
|
||||
define void @commute_sge_neg2_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %gep.in
|
||||
|
@ -144,7 +144,7 @@ define void @commute_sge_neg2_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
|
|||
; GCN-LABEL: {{^}}commute_slt_neg16_i32:
|
||||
; GCN: v_cmp_gt_i32_e32 vcc, -16, v{{[0-9]+}}
|
||||
define void @commute_slt_neg16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %gep.in
|
||||
|
@ -157,7 +157,7 @@ define void @commute_slt_neg16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in
|
|||
; GCN-LABEL: {{^}}commute_sle_5_i32:
|
||||
; GCN: v_cmp_gt_i32_e32 vcc, 6, v{{[0-9]+}}
|
||||
define void @commute_sle_5_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %gep.in
|
||||
|
@ -174,7 +174,7 @@ define void @commute_sle_5_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1
|
|||
; GCN-LABEL: {{^}}commute_eq_64_i64:
|
||||
; GCN: v_cmp_eq_i64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_eq_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %gep.in
|
||||
|
@ -187,7 +187,7 @@ define void @commute_eq_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1
|
|||
; GCN-LABEL: {{^}}commute_ne_64_i64:
|
||||
; GCN: v_cmp_ne_i64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_ne_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %gep.in
|
||||
|
@ -200,7 +200,7 @@ define void @commute_ne_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1
|
|||
; GCN-LABEL: {{^}}commute_ugt_64_i64:
|
||||
; GCN: v_cmp_lt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_ugt_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %gep.in
|
||||
|
@ -213,7 +213,7 @@ define void @commute_ugt_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #
|
|||
; GCN-LABEL: {{^}}commute_uge_64_i64:
|
||||
; GCN: v_cmp_lt_u64_e32 vcc, 63, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_uge_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %gep.in
|
||||
|
@ -226,7 +226,7 @@ define void @commute_uge_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #
|
|||
; GCN-LABEL: {{^}}commute_ult_64_i64:
|
||||
; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_ult_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %gep.in
|
||||
|
@ -239,7 +239,7 @@ define void @commute_ult_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #
|
|||
; GCN-LABEL: {{^}}commute_ule_63_i64:
|
||||
; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_ule_63_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %gep.in
|
||||
|
@ -255,7 +255,7 @@ define void @commute_ule_63_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #
|
|||
; GCN-DAG: s_movk_i32 s[[KLO:[0-9]+]], 0x41{{$}}
|
||||
; GCN: v_cmp_gt_u64_e32 vcc, s{{\[}}[[KLO]]:{{[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_ule_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %gep.in
|
||||
|
@ -268,7 +268,7 @@ define void @commute_ule_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #
|
|||
; GCN-LABEL: {{^}}commute_sgt_neg1_i64:
|
||||
; GCN: v_cmp_lt_i64_e32 vcc, -1, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_sgt_neg1_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %gep.in
|
||||
|
@ -281,7 +281,7 @@ define void @commute_sgt_neg1_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in)
|
|||
; GCN-LABEL: {{^}}commute_sge_neg2_i64:
|
||||
; GCN: v_cmp_lt_i64_e32 vcc, -3, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_sge_neg2_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %gep.in
|
||||
|
@ -294,7 +294,7 @@ define void @commute_sge_neg2_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in)
|
|||
; GCN-LABEL: {{^}}commute_slt_neg16_i64:
|
||||
; GCN: v_cmp_gt_i64_e32 vcc, -16, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_slt_neg16_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %gep.in
|
||||
|
@ -307,7 +307,7 @@ define void @commute_slt_neg16_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in
|
|||
; GCN-LABEL: {{^}}commute_sle_5_i64:
|
||||
; GCN: v_cmp_gt_i64_e32 vcc, 6, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_sle_5_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %gep.in
|
||||
|
@ -325,7 +325,7 @@ define void @commute_sle_5_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1
|
|||
; GCN-LABEL: {{^}}commute_oeq_2.0_f32:
|
||||
; GCN: v_cmp_eq_f32_e32 vcc, 2.0, v{{[0-9]+}}
|
||||
define void @commute_oeq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load float, float addrspace(1)* %gep.in
|
||||
|
@ -339,7 +339,7 @@ define void @commute_oeq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
|
|||
; GCN-LABEL: {{^}}commute_ogt_2.0_f32:
|
||||
; GCN: v_cmp_lt_f32_e32 vcc, 2.0, v{{[0-9]+}}
|
||||
define void @commute_ogt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load float, float addrspace(1)* %gep.in
|
||||
|
@ -352,7 +352,7 @@ define void @commute_ogt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
|
|||
; GCN-LABEL: {{^}}commute_oge_2.0_f32:
|
||||
; GCN: v_cmp_le_f32_e32 vcc, 2.0, v{{[0-9]+}}
|
||||
define void @commute_oge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load float, float addrspace(1)* %gep.in
|
||||
|
@ -365,7 +365,7 @@ define void @commute_oge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
|
|||
; GCN-LABEL: {{^}}commute_olt_2.0_f32:
|
||||
; GCN: v_cmp_gt_f32_e32 vcc, 2.0, v{{[0-9]+}}
|
||||
define void @commute_olt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load float, float addrspace(1)* %gep.in
|
||||
|
@ -378,7 +378,7 @@ define void @commute_olt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
|
|||
; GCN-LABEL: {{^}}commute_ole_2.0_f32:
|
||||
; GCN: v_cmp_ge_f32_e32 vcc, 2.0, v{{[0-9]+}}
|
||||
define void @commute_ole_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load float, float addrspace(1)* %gep.in
|
||||
|
@ -391,7 +391,7 @@ define void @commute_ole_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
|
|||
; GCN-LABEL: {{^}}commute_one_2.0_f32:
|
||||
; GCN: v_cmp_lg_f32_e32 vcc, 2.0, v{{[0-9]+}}
|
||||
define void @commute_one_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load float, float addrspace(1)* %gep.in
|
||||
|
@ -404,7 +404,7 @@ define void @commute_one_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
|
|||
; GCN-LABEL: {{^}}commute_ord_2.0_f32:
|
||||
; GCN: v_cmp_o_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]]
|
||||
define void @commute_ord_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load float, float addrspace(1)* %gep.in
|
||||
|
@ -417,7 +417,7 @@ define void @commute_ord_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
|
|||
; GCN-LABEL: {{^}}commute_ueq_2.0_f32:
|
||||
; GCN: v_cmp_nlg_f32_e32 vcc, 2.0, v{{[0-9]+}}
|
||||
define void @commute_ueq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load float, float addrspace(1)* %gep.in
|
||||
|
@ -430,7 +430,7 @@ define void @commute_ueq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
|
|||
; GCN-LABEL: {{^}}commute_ugt_2.0_f32:
|
||||
; GCN: v_cmp_nge_f32_e32 vcc, 2.0, v{{[0-9]+}}
|
||||
define void @commute_ugt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load float, float addrspace(1)* %gep.in
|
||||
|
@ -443,7 +443,7 @@ define void @commute_ugt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
|
|||
; GCN-LABEL: {{^}}commute_uge_2.0_f32:
|
||||
; GCN: v_cmp_ngt_f32_e32 vcc, 2.0, v{{[0-9]+}}
|
||||
define void @commute_uge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load float, float addrspace(1)* %gep.in
|
||||
|
@ -456,7 +456,7 @@ define void @commute_uge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
|
|||
; GCN-LABEL: {{^}}commute_ult_2.0_f32:
|
||||
; GCN: v_cmp_nle_f32_e32 vcc, 2.0, v{{[0-9]+}}
|
||||
define void @commute_ult_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load float, float addrspace(1)* %gep.in
|
||||
|
@ -469,7 +469,7 @@ define void @commute_ult_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
|
|||
; GCN-LABEL: {{^}}commute_ule_2.0_f32:
|
||||
; GCN: v_cmp_nlt_f32_e32 vcc, 2.0, v{{[0-9]+}}
|
||||
define void @commute_ule_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load float, float addrspace(1)* %gep.in
|
||||
|
@ -482,7 +482,7 @@ define void @commute_ule_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
|
|||
; GCN-LABEL: {{^}}commute_une_2.0_f32:
|
||||
; GCN: v_cmp_neq_f32_e32 vcc, 2.0, v{{[0-9]+}}
|
||||
define void @commute_une_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load float, float addrspace(1)* %gep.in
|
||||
|
@ -495,7 +495,7 @@ define void @commute_une_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
|
|||
; GCN-LABEL: {{^}}commute_uno_2.0_f32:
|
||||
; GCN: v_cmp_u_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]]
|
||||
define void @commute_uno_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load float, float addrspace(1)* %gep.in
|
||||
|
@ -513,7 +513,7 @@ define void @commute_uno_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in
|
|||
; GCN-LABEL: {{^}}commute_oeq_2.0_f64:
|
||||
; GCN: v_cmp_eq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_oeq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load double, double addrspace(1)* %gep.in
|
||||
|
@ -527,7 +527,7 @@ define void @commute_oeq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
|
|||
; GCN-LABEL: {{^}}commute_ogt_2.0_f64:
|
||||
; GCN: v_cmp_lt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_ogt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load double, double addrspace(1)* %gep.in
|
||||
|
@ -540,7 +540,7 @@ define void @commute_ogt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
|
|||
; GCN-LABEL: {{^}}commute_oge_2.0_f64:
|
||||
; GCN: v_cmp_le_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_oge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load double, double addrspace(1)* %gep.in
|
||||
|
@ -553,7 +553,7 @@ define void @commute_oge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
|
|||
; GCN-LABEL: {{^}}commute_olt_2.0_f64:
|
||||
; GCN: v_cmp_gt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_olt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load double, double addrspace(1)* %gep.in
|
||||
|
@ -566,7 +566,7 @@ define void @commute_olt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
|
|||
; GCN-LABEL: {{^}}commute_ole_2.0_f64:
|
||||
; GCN: v_cmp_ge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_ole_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load double, double addrspace(1)* %gep.in
|
||||
|
@ -579,7 +579,7 @@ define void @commute_ole_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
|
|||
; GCN-LABEL: {{^}}commute_one_2.0_f64:
|
||||
; GCN: v_cmp_lg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_one_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load double, double addrspace(1)* %gep.in
|
||||
|
@ -592,7 +592,7 @@ define void @commute_one_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
|
|||
; GCN-LABEL: {{^}}commute_ord_2.0_f64:
|
||||
; GCN: v_cmp_o_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]]
|
||||
define void @commute_ord_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load double, double addrspace(1)* %gep.in
|
||||
|
@ -605,7 +605,7 @@ define void @commute_ord_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
|
|||
; GCN-LABEL: {{^}}commute_ueq_2.0_f64:
|
||||
; GCN: v_cmp_nlg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_ueq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load double, double addrspace(1)* %gep.in
|
||||
|
@ -618,7 +618,7 @@ define void @commute_ueq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
|
|||
; GCN-LABEL: {{^}}commute_ugt_2.0_f64:
|
||||
; GCN: v_cmp_nge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_ugt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load double, double addrspace(1)* %gep.in
|
||||
|
@ -631,7 +631,7 @@ define void @commute_ugt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
|
|||
; GCN-LABEL: {{^}}commute_uge_2.0_f64:
|
||||
; GCN: v_cmp_ngt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_uge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load double, double addrspace(1)* %gep.in
|
||||
|
@ -644,7 +644,7 @@ define void @commute_uge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
|
|||
; GCN-LABEL: {{^}}commute_ult_2.0_f64:
|
||||
; GCN: v_cmp_nle_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_ult_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load double, double addrspace(1)* %gep.in
|
||||
|
@ -657,7 +657,7 @@ define void @commute_ult_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
|
|||
; GCN-LABEL: {{^}}commute_ule_2.0_f64:
|
||||
; GCN: v_cmp_nlt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_ule_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load double, double addrspace(1)* %gep.in
|
||||
|
@ -670,7 +670,7 @@ define void @commute_ule_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
|
|||
; GCN-LABEL: {{^}}commute_une_2.0_f64:
|
||||
; GCN: v_cmp_neq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @commute_une_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load double, double addrspace(1)* %gep.in
|
||||
|
@ -683,7 +683,7 @@ define void @commute_une_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
|
|||
; GCN-LABEL: {{^}}commute_uno_2.0_f64:
|
||||
; GCN: v_cmp_u_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]]
|
||||
define void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load double, double addrspace(1)* %gep.in
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
declare float @llvm.fabs.f32(float) #1
|
||||
declare float @llvm.fma.f32(float, float, float) nounwind readnone
|
||||
|
||||
|
@ -9,7 +9,7 @@ declare float @llvm.fma.f32(float, float, float) nounwind readnone
|
|||
; SI: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, |[[X]]|
|
||||
; SI-NEXT: buffer_store_dword [[REG]]
|
||||
define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%x = load float, float addrspace(1)* %gep.0
|
||||
%x.fabs = call float @llvm.fabs.f32(float %x) #1
|
||||
|
@ -23,7 +23,7 @@ define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(
|
|||
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], -4.0, |[[X]]|
|
||||
; SI-NEXT: buffer_store_dword [[REG]]
|
||||
define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%x = load float, float addrspace(1)* %gep.0
|
||||
%x.fabs = call float @llvm.fabs.f32(float %x) #1
|
||||
|
@ -38,7 +38,7 @@ define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrs
|
|||
; SI: v_mul_f32_e32 [[REG:v[0-9]+]], -4.0, [[X]]
|
||||
; SI-NEXT: buffer_store_dword [[REG]]
|
||||
define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%x = load float, float addrspace(1)* %gep.0
|
||||
%x.fneg = fsub float -0.000000e+00, %x
|
||||
|
@ -54,7 +54,7 @@ define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(
|
|||
; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, [[K]]
|
||||
; SI-NEXT: buffer_store_dword [[REG]]
|
||||
define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%x = load float, float addrspace(1)* %gep.0
|
||||
%x.fabs = call float @llvm.fabs.f32(float %x) #1
|
||||
|
@ -69,7 +69,7 @@ define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(
|
|||
; SI: v_add_f32_e64 [[REG:v[0-9]+]], [[X]], |[[Y]]|
|
||||
; SI-NEXT: buffer_store_dword [[REG]]
|
||||
define void @commute_add_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%x = load float, float addrspace(1)* %gep.0
|
||||
|
@ -86,7 +86,7 @@ define void @commute_add_fabs_f32(float addrspace(1)* %out, float addrspace(1)*
|
|||
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -[[Y]]
|
||||
; SI-NEXT: buffer_store_dword [[REG]]
|
||||
define void @commute_mul_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%x = load float, float addrspace(1)* %gep.0
|
||||
|
@ -103,7 +103,7 @@ define void @commute_mul_fneg_f32(float addrspace(1)* %out, float addrspace(1)*
|
|||
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -|[[Y]]|
|
||||
; SI-NEXT: buffer_store_dword [[REG]]
|
||||
define void @commute_mul_fabs_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%x = load float, float addrspace(1)* %gep.0
|
||||
|
@ -122,7 +122,7 @@ define void @commute_mul_fabs_fneg_f32(float addrspace(1)* %out, float addrspace
|
|||
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, |[[Y]]|
|
||||
; SI-NEXT: buffer_store_dword [[REG]]
|
||||
define void @commute_mul_fabs_x_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%x = load float, float addrspace(1)* %gep.0
|
||||
|
@ -140,7 +140,7 @@ define void @commute_mul_fabs_x_fabs_y_f32(float addrspace(1)* %out, float addrs
|
|||
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, -|[[Y]]|
|
||||
; SI-NEXT: buffer_store_dword [[REG]]
|
||||
define void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%x = load float, float addrspace(1)* %gep.0
|
||||
|
@ -162,7 +162,7 @@ define void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float
|
|||
; SI: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], |[[R2]]|
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
define void @fma_a_2.0_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
; GCN: s_endpgm
|
||||
define void @reschedule_global_load_lds_store(i32 addrspace(1)* noalias %gptr0, i32 addrspace(1)* noalias %gptr1, i32 addrspace(3)* noalias %lptr, i32 %c) #0 {
|
||||
entry:
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx = shl i32 %tid, 2
|
||||
%gep0 = getelementptr i32, i32 addrspace(1)* %gptr0, i32 %idx
|
||||
%gep1 = getelementptr i32, i32 addrspace(1)* %gptr1, i32 %idx
|
||||
|
@ -42,10 +42,7 @@ exit: ; preds = %for.body, %entry
|
|||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tgid.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI --check-prefix=CHECK %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI --check-prefix=CHECK %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt,+unsafe-ds-offset-folding < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs -mattr=+load-store-opt,+unsafe-ds-offset-folding < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
declare void @llvm.AMDGPU.barrier.local() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
declare void @llvm.amdgcn.s.barrier() #1
|
||||
|
||||
; Function Attrs: nounwind
|
||||
; CHECK-LABEL: {{^}}signed_ds_offset_addressing_loop:
|
||||
|
@ -25,7 +25,7 @@ declare void @llvm.AMDGPU.barrier.local() #1
|
|||
; CHECK: s_endpgm
|
||||
define void @signed_ds_offset_addressing_loop(float addrspace(1)* noalias nocapture %out, float addrspace(3)* noalias nocapture readonly %lptr, i32 %n) #2 {
|
||||
entry:
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%mul = shl nsw i32 %x.i, 1
|
||||
br label %for.body
|
||||
|
||||
|
@ -33,7 +33,7 @@ for.body: ; preds = %for.body, %entry
|
|||
%sum.03 = phi float [ 0.000000e+00, %entry ], [ %add13, %for.body ]
|
||||
%offset.02 = phi i32 [ %mul, %entry ], [ %add14, %for.body ]
|
||||
%k.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
tail call void @llvm.AMDGPU.barrier.local() #1
|
||||
tail call void @llvm.amdgcn.s.barrier() #1
|
||||
%arrayidx = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %offset.02
|
||||
%tmp = load float, float addrspace(3)* %arrayidx, align 4
|
||||
%add1 = add nsw i32 %offset.02, 1
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
|
||||
|
||||
declare void @llvm.AMDGPU.barrier.local() #2
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
@lds.obj = addrspace(3) global [256 x i32] undef, align 4
|
||||
|
||||
|
@ -12,7 +11,7 @@ declare i32 @llvm.r600.read.tidig.x() #0
|
|||
; GCN: ds_write_b32 [[BASEPTR]], [[VAL]] offset:12
|
||||
define void @write_ds_sub0_offset0_global() #0 {
|
||||
entry:
|
||||
%x.i = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%sub1 = sub i32 0, %x.i
|
||||
%tmp0 = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds.obj, i32 0, i32 %sub1
|
||||
%arrayidx = getelementptr inbounds i32, i32 addrspace(3)* %tmp0, i32 3
|
||||
|
@ -26,7 +25,7 @@ entry:
|
|||
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
|
||||
; GCN: ds_write_b8 [[NEG]], [[K]] offset:65535
|
||||
define void @add_x_shl_neg_to_sub_max_offset() #1 {
|
||||
%x.i = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%neg = sub i32 0, %x.i
|
||||
%shl = shl i32 %neg, 2
|
||||
%add = add i32 65535, %shl
|
||||
|
@ -41,7 +40,7 @@ define void @add_x_shl_neg_to_sub_max_offset() #1 {
|
|||
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
|
||||
; GCN: ds_write_b8 [[NEG]], [[K]]{{$}}
|
||||
define void @add_x_shl_neg_to_sub_max_offset_p1() #1 {
|
||||
%x.i = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%neg = sub i32 0, %x.i
|
||||
%shl = shl i32 %neg, 2
|
||||
%add = add i32 65536, %shl
|
||||
|
@ -60,7 +59,7 @@ define void @add_x_shl_neg_to_sub_max_offset_p1() #1 {
|
|||
; GCN: ds_write_b32 [[NEG]], [[K]] offset:456{{$}}
|
||||
; GCN: s_endpgm
|
||||
define void @add_x_shl_neg_to_sub_multi_use() #1 {
|
||||
%x.i = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%neg = sub i32 0, %x.i
|
||||
%shl = shl i32 %neg, 2
|
||||
%add0 = add i32 123, %shl
|
||||
|
@ -82,7 +81,7 @@ define void @add_x_shl_neg_to_sub_multi_use() #1 {
|
|||
; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}}
|
||||
; GCN: s_endpgm
|
||||
define void @add_x_shl_neg_to_sub_multi_use_same_offset() #1 {
|
||||
%x.i = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%neg = sub i32 0, %x.i
|
||||
%shl = shl i32 %neg, 2
|
||||
%add = add i32 123, %shl
|
||||
|
@ -97,7 +96,7 @@ define void @add_x_shl_neg_to_sub_multi_use_same_offset() #1 {
|
|||
; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]]
|
||||
; GCN: ds_write2_b32 [[NEG]], {{v[0-9]+}}, {{v[0-9]+}} offset0:254 offset1:255
|
||||
define void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 {
|
||||
%x.i = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%neg = sub i32 0, %x.i
|
||||
%shl = shl i32 %neg, 2
|
||||
%add = add i32 1019, %shl
|
||||
|
@ -111,7 +110,7 @@ define void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 {
|
|||
; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0x3fc, [[SCALED]]
|
||||
; GCN: ds_write2_b32 [[NEG]], {{v[0-9]+}}, {{v[0-9]+}} offset1:1{{$}}
|
||||
define void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() #1 {
|
||||
%x.i = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%neg = sub i32 0, %x.i
|
||||
%shl = shl i32 %neg, 2
|
||||
%add = add i32 1020, %shl
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
; SI: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f32(float addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
|
||||
%val0 = load float, float addrspace(3)* %arrayidx0, align 4
|
||||
%add.x = add nsw i32 %x.i, 8
|
||||
|
@ -32,7 +32,7 @@ define void @simple_read2_f32(float addrspace(1)* %out) #0 {
|
|||
; SI: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
|
||||
%val0 = load float, float addrspace(3)* %arrayidx0, align 4
|
||||
%add.x = add nsw i32 %x.i, 255
|
||||
|
@ -50,7 +50,7 @@ define void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 {
|
|||
; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
|
||||
%val0 = load float, float addrspace(3)* %arrayidx0, align 4
|
||||
%add.x = add nsw i32 %x.i, 257
|
||||
|
@ -67,7 +67,7 @@ define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
|
|||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 0
|
||||
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
|
||||
%val0 = load float, float addrspace(3)* %arrayidx0, align 4
|
||||
|
@ -99,7 +99,7 @@ define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
|
|||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 0
|
||||
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
|
||||
%val0 = load float, float addrspace(3)* %arrayidx0, align 4
|
||||
|
@ -109,7 +109,7 @@ define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 {
|
|||
%val1 = load float, float addrspace(3)* %arrayidx1, align 4
|
||||
%sum.0 = fadd float %val0, %val1
|
||||
|
||||
call void @llvm.AMDGPU.barrier.local() #2
|
||||
call void @llvm.amdgcn.s.barrier() #2
|
||||
|
||||
%idx.2 = add nsw i32 %tid.x, 11
|
||||
%arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
|
||||
|
@ -134,7 +134,7 @@ define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 {
|
|||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 2
|
||||
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
|
||||
%val0 = load float, float addrspace(3)* %arrayidx0, align 4
|
||||
|
@ -171,7 +171,7 @@ define void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 {
|
|||
; SI: ds_read_b32
|
||||
; SI: s_endpgm
|
||||
define void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
|
||||
%index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
|
||||
%gep = getelementptr inbounds float, <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
|
||||
|
@ -197,7 +197,7 @@ define void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float ad
|
|||
; SI: ds_read_b32
|
||||
; SI: s_endpgm
|
||||
define void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
|
||||
%index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
|
||||
%gep = getelementptr inbounds float, <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
|
||||
|
@ -220,7 +220,7 @@ define void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x f
|
|||
; SI: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:8{{$}}
|
||||
; SI: s_endpgm
|
||||
define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%ptr.0 = insertelement <2 x [512 x float] addrspace(3)*> undef, [512 x float] addrspace(3)* @lds, i32 0
|
||||
%ptr.1 = insertelement <2 x [512 x float] addrspace(3)*> %ptr.0, [512 x float] addrspace(3)* @lds, i32 1
|
||||
%x.i.v.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
|
||||
|
@ -244,7 +244,7 @@ define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 {
|
|||
; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
|
||||
%val0 = load volatile float, float addrspace(3)* %arrayidx0, align 4
|
||||
%add.x = add nsw i32 %x.i, 8
|
||||
|
@ -262,7 +262,7 @@ define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 {
|
|||
; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
|
||||
%val0 = load float, float addrspace(3)* %arrayidx0, align 4
|
||||
%add.x = add nsw i32 %x.i, 8
|
||||
|
@ -281,7 +281,7 @@ define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 {
|
|||
; SI-NOT: ds_read2_b32
|
||||
; SI: s_endpgm
|
||||
define void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i
|
||||
%val0 = load float, float addrspace(3)* %arrayidx0, align 1
|
||||
%add.x = add nsw i32 %x.i, 8
|
||||
|
@ -297,7 +297,7 @@ define void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %
|
|||
; SI-NOT: ds_read2_b32
|
||||
; SI: s_endpgm
|
||||
define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i
|
||||
%val0 = load float, float addrspace(3)* %arrayidx0, align 2
|
||||
%add.x = add nsw i32 %x.i, 8
|
||||
|
@ -316,7 +316,7 @@ define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrs
|
|||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f64(double addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
|
||||
%val0 = load double, double addrspace(3)* %arrayidx0, align 8
|
||||
%add.x = add nsw i32 %x.i, 8
|
||||
|
@ -332,7 +332,7 @@ define void @simple_read2_f64(double addrspace(1)* %out) #0 {
|
|||
; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:255
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
|
||||
%val0 = load double, double addrspace(3)* %arrayidx0, align 8
|
||||
%add.x = add nsw i32 %x.i, 255
|
||||
|
@ -350,7 +350,7 @@ define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
|
|||
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:2056
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
|
||||
%val0 = load double, double addrspace(3)* %arrayidx0, align 8
|
||||
%add.x = add nsw i32 %x.i, 257
|
||||
|
@ -368,7 +368,7 @@ define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
|
|||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15
|
||||
; SI: s_endpgm
|
||||
define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
|
||||
%val0 = load double, double addrspace(3)* %arrayidx0, align 4
|
||||
%add.x = add nsw i32 %x.i, 7
|
||||
|
@ -438,8 +438,8 @@ define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
|
|||
@sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4
|
||||
|
||||
define void @sgemm_inner_loop_read2_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tgid.x() #1
|
||||
%y.i = tail call i32 @llvm.r600.read.tidig.y() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #1
|
||||
%y.i = tail call i32 @llvm.amdgcn.workitem.id.y() #1
|
||||
%arrayidx44 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
|
||||
%tmp16 = load float, float addrspace(3)* %arrayidx44, align 4
|
||||
%add47 = add nsw i32 %x.i, 1
|
||||
|
@ -494,19 +494,19 @@ define void @misaligned_read2_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %in)
|
|||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tgid.x() #1
|
||||
declare i32 @llvm.amdgcn.workgroup.id.x() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tgid.y() #1
|
||||
declare i32 @llvm.amdgcn.workgroup.id.y() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tidig.y() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.y() #1
|
||||
|
||||
; Function Attrs: convergent nounwind
|
||||
declare void @llvm.AMDGPU.barrier.local() #2
|
||||
declare void @llvm.amdgcn.s.barrier() #2
|
||||
|
||||
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
; CI: buffer_store_dwordx2 [[RESULT]]
|
||||
; CI: s_endpgm
|
||||
define void @simple_read2_v2f32_superreg_align4(<2 x float> addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x <2 x float>], [512 x <2 x float>] addrspace(3)* @lds.v2, i32 0, i32 %x.i
|
||||
%val0 = load <2 x float>, <2 x float> addrspace(3)* %arrayidx0, align 4
|
||||
%out.gep = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %out, i32 %x.i
|
||||
|
@ -27,7 +27,7 @@ define void @simple_read2_v2f32_superreg_align4(<2 x float> addrspace(1)* %out)
|
|||
; CI: buffer_store_dwordx2 [[RESULT]]
|
||||
; CI: s_endpgm
|
||||
define void @simple_read2_v2f32_superreg(<2 x float> addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x <2 x float>], [512 x <2 x float>] addrspace(3)* @lds.v2, i32 0, i32 %x.i
|
||||
%val0 = load <2 x float>, <2 x float> addrspace(3)* %arrayidx0
|
||||
%out.gep = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %out, i32 %x.i
|
||||
|
@ -44,7 +44,7 @@ define void @simple_read2_v2f32_superreg(<2 x float> addrspace(1)* %out) #0 {
|
|||
; CI: buffer_store_dword v[[ADD2]]
|
||||
; CI: s_endpgm
|
||||
define void @simple_read2_v4f32_superreg_align4(float addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x <4 x float>], [512 x <4 x float>] addrspace(3)* @lds.v4, i32 0, i32 %x.i
|
||||
%val0 = load <4 x float>, <4 x float> addrspace(3)* %arrayidx0, align 4
|
||||
%elt0 = extractelement <4 x float> %val0, i32 0
|
||||
|
@ -69,7 +69,7 @@ define void @simple_read2_v4f32_superreg_align4(float addrspace(1)* %out) #0 {
|
|||
; CI: buffer_store_dword v[[ADD1]]
|
||||
; CI: s_endpgm
|
||||
define void @simple_read2_v3f32_superreg_align4(float addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x <3 x float>], [512 x <3 x float>] addrspace(3)* @lds.v3, i32 0, i32 %x.i
|
||||
%val0 = load <3 x float>, <3 x float> addrspace(3)* %arrayidx0, align 4
|
||||
%elt0 = extractelement <3 x float> %val0, i32 0
|
||||
|
@ -95,7 +95,7 @@ define void @simple_read2_v3f32_superreg_align4(float addrspace(1)* %out) #0 {
|
|||
; CI: buffer_store_dwordx4
|
||||
; CI: s_endpgm
|
||||
define void @simple_read2_v4f32_superreg_align8(<4 x float> addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x <4 x float>], [512 x <4 x float>] addrspace(3)* @lds.v4, i32 0, i32 %x.i
|
||||
%val0 = load <4 x float>, <4 x float> addrspace(3)* %arrayidx0, align 8
|
||||
%out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i32 %x.i
|
||||
|
@ -110,7 +110,7 @@ define void @simple_read2_v4f32_superreg_align8(<4 x float> addrspace(1)* %out)
|
|||
; CI: buffer_store_dwordx4
|
||||
; CI: s_endpgm
|
||||
define void @simple_read2_v4f32_superreg(<4 x float> addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x <4 x float>], [512 x <4 x float>] addrspace(3)* @lds.v4, i32 0, i32 %x.i
|
||||
%val0 = load <4 x float>, <4 x float> addrspace(3)* %arrayidx0
|
||||
%out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i32 %x.i
|
||||
|
@ -130,7 +130,7 @@ define void @simple_read2_v4f32_superreg(<4 x float> addrspace(1)* %out) #0 {
|
|||
; CI: buffer_store_dwordx4
|
||||
; CI: s_endpgm
|
||||
define void @simple_read2_v8f32_superreg(<8 x float> addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x <8 x float>], [512 x <8 x float>] addrspace(3)* @lds.v8, i32 0, i32 %x.i
|
||||
%val0 = load <8 x float>, <8 x float> addrspace(3)* %arrayidx0
|
||||
%out.gep = getelementptr inbounds <8 x float>, <8 x float> addrspace(1)* %out, i32 %x.i
|
||||
|
@ -158,7 +158,7 @@ define void @simple_read2_v8f32_superreg(<8 x float> addrspace(1)* %out) #0 {
|
|||
; CI: buffer_store_dwordx4
|
||||
; CI: s_endpgm
|
||||
define void @simple_read2_v16f32_superreg(<16 x float> addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x <16 x float>], [512 x <16 x float>] addrspace(3)* @lds.v16, i32 0, i32 %x.i
|
||||
%val0 = load <16 x float>, <16 x float> addrspace(3)* %arrayidx0
|
||||
%out.gep = getelementptr inbounds <16 x float>, <16 x float> addrspace(1)* %out, i32 %x.i
|
||||
|
@ -173,7 +173,7 @@ define void @simple_read2_v16f32_superreg(<16 x float> addrspace(1)* %out) #0 {
|
|||
; CI: buffer_store_dwordx2 v{{\[}}[[REG_ELT0]]:[[REG_ELT1]]{{\]}}
|
||||
; CI: s_endpgm
|
||||
define void @simple_read2_v2f32_superreg_scalar_loads_align4(<2 x float> addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
|
||||
%arrayidx1 = getelementptr inbounds float, float addrspace(3)* %arrayidx0, i32 1
|
||||
|
||||
|
@ -196,7 +196,7 @@ define void @simple_read2_v2f32_superreg_scalar_loads_align4(<2 x float> addrspa
|
|||
; CI: buffer_store_dwordx4 v{{\[}}[[REG_ELT0]]:[[REG_ELT3]]{{\]}}
|
||||
; CI: s_endpgm
|
||||
define void @simple_read2_v4f32_superreg_scalar_loads_align4(<4 x float> addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
|
||||
%arrayidx1 = getelementptr inbounds float, float addrspace(3)* %arrayidx0, i32 1
|
||||
%arrayidx2 = getelementptr inbounds float, float addrspace(3)* %arrayidx0, i32 2
|
||||
|
@ -218,19 +218,10 @@ define void @simple_read2_v4f32_superreg_scalar_loads_align4(<4 x float> addrspa
|
|||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tgid.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tgid.y() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tidig.y() #1
|
||||
|
||||
; Function Attrs: convergent nounwind
|
||||
declare void @llvm.AMDGPU.barrier.local() #2
|
||||
declare i32 @llvm.amdgcn.workitem.id.y() #1
|
||||
|
||||
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
; SI: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
|
||||
%val0 = load float, float addrspace(3)* %arrayidx0, align 4
|
||||
%add.x = add nsw i32 %x.i, 64
|
||||
|
@ -30,7 +30,7 @@ define void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 {
|
|||
; SI: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2st64_f32_1_2(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%add.x.0 = add nsw i32 %x.i, 64
|
||||
%arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
|
||||
%val0 = load float, float addrspace(3)* %arrayidx0, align 4
|
||||
|
@ -50,7 +50,7 @@ define void @simple_read2st64_f32_1_2(float addrspace(1)* %out, float addrspace(
|
|||
; SI: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%add.x.0 = add nsw i32 %x.i, 64
|
||||
%arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
|
||||
%val0 = load float, float addrspace(3)* %arrayidx0, align 4
|
||||
|
@ -70,7 +70,7 @@ define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float add
|
|||
; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%add.x.0 = add nsw i32 %x.i, 64
|
||||
%arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
|
||||
%val0 = load float, float addrspace(3)* %arrayidx0, align 4
|
||||
|
@ -87,7 +87,7 @@ define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, floa
|
|||
; SI-NOT: ds_read2st64_b32
|
||||
; SI: s_endpgm
|
||||
define void @odd_invalid_read2st64_f32_0(float addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
|
||||
%val0 = load float, float addrspace(3)* %arrayidx0, align 4
|
||||
%add.x = add nsw i32 %x.i, 63
|
||||
|
@ -103,7 +103,7 @@ define void @odd_invalid_read2st64_f32_0(float addrspace(1)* %out) #0 {
|
|||
; SI-NOT: ds_read2st64_b32
|
||||
; SI: s_endpgm
|
||||
define void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%add.x.0 = add nsw i32 %x.i, 64
|
||||
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0
|
||||
%val0 = load float, float addrspace(3)* %arrayidx0, align 4
|
||||
|
@ -123,7 +123,7 @@ define void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 {
|
|||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
|
||||
%val0 = load double, double addrspace(3)* %arrayidx0, align 8
|
||||
%add.x = add nsw i32 %x.i, 64
|
||||
|
@ -142,7 +142,7 @@ define void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 {
|
|||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%add.x.0 = add nsw i32 %x.i, 64
|
||||
%arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
|
||||
%val0 = load double, double addrspace(3)* %arrayidx0, align 8
|
||||
|
@ -162,7 +162,7 @@ define void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspac
|
|||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129
|
||||
; SI: s_endpgm
|
||||
define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
|
||||
%val0 = load double, double addrspace(3)* %arrayidx0, align 4
|
||||
%add.x = add nsw i32 %x.i, 64
|
||||
|
@ -182,7 +182,7 @@ define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspac
|
|||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%add.x.0 = add nsw i32 %x.i, 256
|
||||
%arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
|
||||
%val0 = load double, double addrspace(3)* %arrayidx0, align 8
|
||||
|
@ -202,7 +202,7 @@ define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double a
|
|||
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%add.x.0 = add nsw i32 %x.i, 64
|
||||
%arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
|
||||
%val0 = load double, double addrspace(3)* %arrayidx0, align 8
|
||||
|
@ -219,7 +219,7 @@ define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, dou
|
|||
; SI-NOT: ds_read2st64_b64
|
||||
; SI: s_endpgm
|
||||
define void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%add.x.0 = add nsw i32 %x.i, 64
|
||||
%arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
|
||||
%val0 = load double, double addrspace(3)* %arrayidx0, align 8
|
||||
|
@ -240,7 +240,7 @@ define void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double
|
|||
; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:8
|
||||
; SI: s_endpgm
|
||||
define void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
|
||||
%val0 = load double, double addrspace(3)* %arrayidx0, align 8
|
||||
%add.x = add nsw i32 %x.i, 8
|
||||
|
@ -253,16 +253,10 @@ define void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, do
|
|||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tgid.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tgid.y() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tidig.y() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.y() #1
|
||||
|
||||
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:8
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%in.gep = getelementptr float, float addrspace(1)* %in, i32 %x.i
|
||||
%val = load float, float addrspace(1)* %in.gep, align 4
|
||||
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
|
||||
|
@ -28,7 +28,7 @@ define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1
|
|||
; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
|
||||
%in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
|
||||
%val0 = load float, float addrspace(1)* %in.gep.0, align 4
|
||||
|
@ -47,7 +47,7 @@ define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1
|
|||
; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
|
||||
%in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
|
||||
%val0 = load float, float addrspace(1)* %in0.gep, align 4
|
||||
|
@ -66,7 +66,7 @@ define void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float
|
|||
; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
|
||||
%in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
|
||||
%val0 = load float, float addrspace(1)* %in0.gep, align 4
|
||||
|
@ -87,7 +87,7 @@ define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float
|
|||
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%in.gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i
|
||||
%in.gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in.gep.0, i32 1
|
||||
%val0 = load <2 x float>, <2 x float> addrspace(1)* %in.gep.0, align 8
|
||||
|
@ -108,7 +108,7 @@ define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2
|
|||
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%in.gep = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i
|
||||
%val = load <2 x float>, <2 x float> addrspace(1)* %in.gep, align 8
|
||||
%val0 = extractelement <2 x float> %val, i32 0
|
||||
|
@ -127,7 +127,7 @@ define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x floa
|
|||
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%in.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 %x.i
|
||||
%val = load <4 x float>, <4 x float> addrspace(1)* %in.gep, align 16
|
||||
%val0 = extractelement <4 x float> %val, i32 0
|
||||
|
@ -147,7 +147,7 @@ define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x floa
|
|||
; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
|
||||
%in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
|
||||
%val0 = load float, float addrspace(1)* %in.gep.0, align 4
|
||||
|
@ -165,7 +165,7 @@ define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float
|
|||
; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
|
||||
%in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
|
||||
%val0 = load float, float addrspace(1)* %in0.gep, align 4
|
||||
|
@ -183,7 +183,7 @@ define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float add
|
|||
; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x
|
||||
%in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %tid.x
|
||||
%val0 = load float, float addrspace(1)* %in0.gep, align 4
|
||||
|
@ -213,7 +213,7 @@ define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspac
|
|||
; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x
|
||||
%in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %tid.x
|
||||
%val0 = load float, float addrspace(1)* %in0.gep, align 4
|
||||
|
@ -244,7 +244,7 @@ define void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, f
|
|||
; SI: ds_write_b32
|
||||
; SI: s_endpgm
|
||||
define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1, <2 x float addrspace(3)*> %lds.ptr) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
|
||||
%in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
|
||||
%val0 = load float, float addrspace(1)* %in0.gep, align 4
|
||||
|
@ -271,7 +271,7 @@ define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float add
|
|||
; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset1:8
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
|
||||
%val = load double, double addrspace(1)* %in.gep, align 8
|
||||
%arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
|
||||
|
@ -289,7 +289,7 @@ define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace
|
|||
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15
|
||||
; SI: s_endpgm
|
||||
define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
|
||||
%val = load double, double addrspace(1)* %in.gep, align 8
|
||||
%arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
|
||||
|
@ -307,7 +307,7 @@ define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, doubl
|
|||
; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i
|
||||
%in.gep.1 = getelementptr double, double addrspace(1)* %in.gep.0, i32 1
|
||||
%val0 = load double, double addrspace(1)* %in.gep.0, align 8
|
||||
|
@ -372,8 +372,8 @@ define void @store_misaligned64_constant_large_offsets() {
|
|||
@sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4
|
||||
|
||||
define void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb, float addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tgid.x() #1
|
||||
%y.i = tail call i32 @llvm.r600.read.tidig.y() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #1
|
||||
%y.i = tail call i32 @llvm.amdgcn.workitem.id.y() #1
|
||||
%val = load float, float addrspace(1)* %in
|
||||
%arrayidx44 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
|
||||
store float %val, float addrspace(3)* %arrayidx44, align 4
|
||||
|
@ -411,7 +411,7 @@ define void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb, f
|
|||
; CI: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset0:1{{$}}
|
||||
; CI: s_endpgm
|
||||
define void @simple_write2_v4f32_superreg_align4(<4 x float> addrspace(3)* %out, <4 x float> addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%in.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %in
|
||||
%val0 = load <4 x float>, <4 x float> addrspace(1)* %in.gep, align 4
|
||||
%out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(3)* %out, i32 %x.i
|
||||
|
@ -420,19 +420,16 @@ define void @simple_write2_v4f32_superreg_align4(<4 x float> addrspace(3)* %out,
|
|||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tgid.x() #1
|
||||
declare i32 @llvm.amdgcn.workgroup.id.x() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tgid.y() #1
|
||||
declare i32 @llvm.amdgcn.workgroup.id.y() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tidig.y() #1
|
||||
|
||||
; Function Attrs: convergent nounwind
|
||||
declare void @llvm.AMDGPU.barrier.local() #2
|
||||
declare i32 @llvm.amdgcn.workitem.id.y() #1
|
||||
|
||||
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:1
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%in.gep = getelementptr float, float addrspace(1)* %in, i32 %x.i
|
||||
%val = load float, float addrspace(1)* %in.gep, align 4
|
||||
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
|
||||
|
@ -26,7 +26,7 @@ define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float add
|
|||
; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
|
||||
%in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
|
||||
%val0 = load float, float addrspace(1)* %in.gep.0, align 4
|
||||
|
@ -47,7 +47,7 @@ define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float add
|
|||
; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
|
||||
%in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
|
||||
%val0 = load float, float addrspace(1)* %in.gep.0, align 4
|
||||
|
@ -67,7 +67,7 @@ define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, fl
|
|||
; SI: ds_write2st64_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i
|
||||
%in.gep.1 = getelementptr double, double addrspace(1)* %in.gep.0, i32 1
|
||||
%val0 = load double, double addrspace(1)* %in.gep.0, align 8
|
||||
|
@ -86,7 +86,7 @@ define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, d
|
|||
; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:8
|
||||
; SI: s_endpgm
|
||||
define void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
|
||||
%val = load double, double addrspace(1)* %in.gep, align 8
|
||||
%arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
|
||||
|
@ -98,19 +98,10 @@ define void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C,
|
|||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tgid.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tgid.y() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tidig.y() #1
|
||||
|
||||
; Function Attrs: convergent nounwind
|
||||
declare void @llvm.AMDGPU.barrier.local() #2
|
||||
declare i32 @llvm.amdgcn.workitem.id.y() #1
|
||||
|
||||
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
declare double @fabs(double) readnone
|
||||
declare double @llvm.fabs.f64(double) readnone
|
||||
|
@ -11,7 +11,7 @@ declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone
|
|||
; SI: v_and_b32
|
||||
; SI: s_endpgm
|
||||
define void @v_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%tidext = sext i32 %tid to i64
|
||||
%gep = getelementptr double, double addrspace(1)* %in, i64 %tidext
|
||||
%val = load double, double addrspace(1)* %gep, align 8
|
||||
|
|
|
@ -127,9 +127,6 @@ define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)*
|
|||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.AMDGPU.barrier.local() #1
|
||||
declare i32 @llvm.r600.read.tidig.x() #3
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind convergent }
|
||||
attributes #3 = { nounwind readnone }
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-FASTFMAF -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-SLOWFMAF -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
declare double @llvm.fabs.f64(double) #0
|
||||
declare double @llvm.fma.f64(double, double, double) #0
|
||||
declare float @llvm.fma.f32(float, float, float) #0
|
||||
|
@ -14,7 +14,7 @@ declare float @llvm.fma.f32(float, float, float) #0
|
|||
; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]]
|
||||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
define void @combine_to_fma_f64_0(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
|
||||
|
@ -42,7 +42,7 @@ define void @combine_to_fma_f64_0(double addrspace(1)* noalias %out, double addr
|
|||
; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; SI: s_endpgm
|
||||
define void @combine_to_fma_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
|
||||
|
@ -71,7 +71,7 @@ define void @combine_to_fma_f64_0_2use(double addrspace(1)* noalias %out, double
|
|||
; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]]
|
||||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
define void @combine_to_fma_f64_1(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
|
||||
|
@ -95,7 +95,7 @@ define void @combine_to_fma_f64_1(double addrspace(1)* noalias %out, double addr
|
|||
; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[C]]
|
||||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
define void @combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
|
||||
|
@ -123,7 +123,7 @@ define void @combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double
|
|||
; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; SI: s_endpgm
|
||||
define void @combine_to_fma_fsub_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
|
||||
|
@ -152,7 +152,7 @@ define void @combine_to_fma_fsub_f64_0_2use(double addrspace(1)* noalias %out, d
|
|||
; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[C]]
|
||||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
define void @combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
|
||||
|
@ -180,7 +180,7 @@ define void @combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double
|
|||
; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; SI: s_endpgm
|
||||
define void @combine_to_fma_fsub_1_f64_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
|
||||
|
@ -209,7 +209,7 @@ define void @combine_to_fma_fsub_1_f64_2use(double addrspace(1)* noalias %out, d
|
|||
; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]]
|
||||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
define void @combine_to_fma_fsub_2_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
|
||||
|
@ -238,7 +238,7 @@ define void @combine_to_fma_fsub_2_f64(double addrspace(1)* noalias %out, double
|
|||
; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; SI: s_endpgm
|
||||
define void @combine_to_fma_fsub_2_f64_2uses_neg(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
|
||||
|
@ -272,7 +272,7 @@ define void @combine_to_fma_fsub_2_f64_2uses_neg(double addrspace(1)* noalias %o
|
|||
; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
; SI: s_endpgm
|
||||
define void @combine_to_fma_fsub_2_f64_2uses_mul(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
|
||||
|
@ -307,7 +307,7 @@ define void @combine_to_fma_fsub_2_f64_2uses_mul(double addrspace(1)* noalias %o
|
|||
; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[FMA0]]
|
||||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
define void @aggressive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
|
||||
|
@ -342,7 +342,7 @@ define void @aggressive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %
|
|||
; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[Y]], [[Z]], [[FMA0]]
|
||||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
define void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; Make sure we don't try to form FMAX_LEGACY nodes with f64
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
; FUNC-LABEL: @test_fmax_legacy_uge_f64
|
||||
define void @test_fmax_legacy_uge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -20,7 +20,7 @@ define void @test_fmax_legacy_uge_f64(double addrspace(1)* %out, double addrspac
|
|||
|
||||
; FUNC-LABEL: @test_fmax_legacy_oge_f64
|
||||
define void @test_fmax_legacy_oge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -35,7 +35,7 @@ define void @test_fmax_legacy_oge_f64(double addrspace(1)* %out, double addrspac
|
|||
|
||||
; FUNC-LABEL: @test_fmax_legacy_ugt_f64
|
||||
define void @test_fmax_legacy_ugt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -50,7 +50,7 @@ define void @test_fmax_legacy_ugt_f64(double addrspace(1)* %out, double addrspac
|
|||
|
||||
; FUNC-LABEL: @test_fmax_legacy_ogt_f64
|
||||
define void @test_fmax_legacy_ogt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
; FUNC-LABEL: @test_fmin_legacy_f64
|
||||
define void @test_fmin_legacy_f64(<4 x double> addrspace(1)* %out, <4 x double> inreg %reg0) #0 {
|
||||
|
@ -15,7 +15,7 @@ define void @test_fmin_legacy_f64(<4 x double> addrspace(1)* %out, <4 x double>
|
|||
|
||||
; FUNC-LABEL: @test_fmin_legacy_ule_f64
|
||||
define void @test_fmin_legacy_ule_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -30,7 +30,7 @@ define void @test_fmin_legacy_ule_f64(double addrspace(1)* %out, double addrspac
|
|||
|
||||
; FUNC-LABEL: @test_fmin_legacy_ole_f64
|
||||
define void @test_fmin_legacy_ole_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -45,7 +45,7 @@ define void @test_fmin_legacy_ole_f64(double addrspace(1)* %out, double addrspac
|
|||
|
||||
; FUNC-LABEL: @test_fmin_legacy_olt_f64
|
||||
define void @test_fmin_legacy_olt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -60,7 +60,7 @@ define void @test_fmin_legacy_olt_f64(double addrspace(1)* %out, double addrspac
|
|||
|
||||
; FUNC-LABEL: @test_fmin_legacy_ult_f64
|
||||
define void @test_fmin_legacy_ult_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
declare float @llvm.fmuladd.f32(float, float, float)
|
||||
declare double @llvm.fmuladd.f64(double, double, double)
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
declare float @llvm.fabs.f32(float) nounwind readnone
|
||||
|
||||
; CHECK-LABEL: {{^}}fmuladd_f32:
|
||||
|
@ -37,7 +37,7 @@ define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
|||
; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
|
||||
; CHECK: buffer_store_dword [[R2]]
|
||||
define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
@ -56,7 +56,7 @@ define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %
|
|||
; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
|
||||
; CHECK: buffer_store_dword [[R2]]
|
||||
define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
@ -77,7 +77,7 @@ define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %
|
|||
define void @fadd_a_a_b_f32(float addrspace(1)* %out,
|
||||
float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
@ -99,7 +99,7 @@ define void @fadd_a_a_b_f32(float addrspace(1)* %out,
|
|||
define void @fadd_b_a_a_f32(float addrspace(1)* %out,
|
||||
float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
@ -119,7 +119,7 @@ define void @fadd_b_a_a_f32(float addrspace(1)* %out,
|
|||
; CHECK: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
|
||||
; CHECK: buffer_store_dword [[R2]]
|
||||
define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
@ -139,7 +139,7 @@ define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1
|
|||
; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
|
||||
; CHECK: buffer_store_dword [[R2]]
|
||||
define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
@ -161,7 +161,7 @@ define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspa
|
|||
; CHECK: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
|
||||
; CHECK: buffer_store_dword [[R2]]
|
||||
define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
@ -183,7 +183,7 @@ define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1
|
|||
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]]
|
||||
; CHECK: buffer_store_dword [[RESULT]]
|
||||
define void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
|
|
@ -1,9 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
declare i1 @llvm.AMDGPU.class.f32(float, i32) #1
|
||||
declare i1 @llvm.AMDGPU.class.f64(double, i32) #1
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare float @llvm.fabs.f32(float) #1
|
||||
declare double @llvm.fabs.f64(double) #1
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
; FUNC-LABEL: @fp_to_sint_f64_i32
|
||||
; SI: v_cvt_i32_f64_e32
|
||||
|
@ -47,7 +47,7 @@ define void @fp_to_sint_v4f64_v4i32(<4 x i32> addrspace(1)* %out, <4 x double> %
|
|||
; CI-DAG: v_cvt_i32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]]
|
||||
; CI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @fp_to_sint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%val = load double, double addrspace(1)* %gep, align 8
|
||||
%cast = fptosi double %val to i64
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
; SI-LABEL: {{^}}fp_to_uint_i32_f64:
|
||||
; SI: v_cvt_u32_f64_e32
|
||||
|
@ -47,7 +47,7 @@ define void @fp_to_uint_v4i32_v4f64(<4 x i32> addrspace(1)* %out, <4 x double> %
|
|||
; CI-DAG: v_cvt_u32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]]
|
||||
; CI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @fp_to_uint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%val = load double, double addrspace(1)* %gep, align 8
|
||||
%cast = fptoui double %val to i64
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
; Tests for indirect addressing on SI, which is implemented using dynamic
|
||||
; indexing of vectors.
|
||||
|
@ -87,7 +87,7 @@ entry:
|
|||
; CHECK: s_cbranch_execnz
|
||||
define void @extract_neg_offset_vgpr(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%id = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%index = add i32 %id, -512
|
||||
%value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
|
||||
store i32 %value, i32 addrspace(1)* %out
|
||||
|
@ -152,7 +152,7 @@ entry:
|
|||
; CHECK: s_cbranch_execnz
|
||||
define void @insert_neg_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
|
||||
entry:
|
||||
%id = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%index = add i32 %id, -512
|
||||
%value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
|
||||
store <4 x i32> %value, <4 x i32> addrspace(1)* %out
|
||||
|
@ -167,12 +167,13 @@ entry:
|
|||
; CHECK: s_cbranch_execnz
|
||||
define void @insert_neg_inline_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
|
||||
entry:
|
||||
%id = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%index = add i32 %id, -16
|
||||
%value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
|
||||
store <4 x i32> %value, <4 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
|
|
@ -3,8 +3,7 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
|
||||
|
||||
|
||||
declare void @llvm.AMDGPU.barrier.local() convergent nounwind
|
||||
declare void @llvm.amdgcn.s.barrier() #1
|
||||
|
||||
; SI-LABEL: {{^}}private_access_f64_alloca:
|
||||
|
||||
|
@ -13,12 +12,12 @@ declare void @llvm.AMDGPU.barrier.local() convergent nounwind
|
|||
|
||||
; SI-PROMOTE: ds_write_b64
|
||||
; SI-PROMOTE: ds_read_b64
|
||||
define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind {
|
||||
define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) #1 {
|
||||
%val = load double, double addrspace(1)* %in, align 8
|
||||
%array = alloca double, i32 16, align 8
|
||||
%ptr = getelementptr inbounds double, double* %array, i32 %b
|
||||
store double %val, double* %ptr, align 8
|
||||
call void @llvm.AMDGPU.barrier.local() convergent nounwind
|
||||
call void @llvm.amdgcn.s.barrier()
|
||||
%result = load double, double* %ptr, align 8
|
||||
store double %result, double addrspace(1)* %out, align 8
|
||||
ret void
|
||||
|
@ -33,12 +32,12 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double
|
|||
; SI-PROMOTE: ds_write_b64
|
||||
; SI-PROMOTE: ds_read_b64
|
||||
; SI-PROMOTE: ds_read_b64
|
||||
define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind {
|
||||
define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) #1 {
|
||||
%val = load <2 x double>, <2 x double> addrspace(1)* %in, align 16
|
||||
%array = alloca <2 x double>, i32 16, align 16
|
||||
%ptr = getelementptr inbounds <2 x double>, <2 x double>* %array, i32 %b
|
||||
store <2 x double> %val, <2 x double>* %ptr, align 16
|
||||
call void @llvm.AMDGPU.barrier.local() convergent nounwind
|
||||
call void @llvm.amdgcn.s.barrier()
|
||||
%result = load <2 x double>, <2 x double>* %ptr, align 16
|
||||
store <2 x double> %result, <2 x double> addrspace(1)* %out, align 16
|
||||
ret void
|
||||
|
@ -51,12 +50,12 @@ define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out
|
|||
|
||||
; SI-PROMOTE: ds_write_b64
|
||||
; SI-PROMOTE: ds_read_b64
|
||||
define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) nounwind {
|
||||
define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) #1 {
|
||||
%val = load i64, i64 addrspace(1)* %in, align 8
|
||||
%array = alloca i64, i32 16, align 8
|
||||
%ptr = getelementptr inbounds i64, i64* %array, i32 %b
|
||||
store i64 %val, i64* %ptr, align 8
|
||||
call void @llvm.AMDGPU.barrier.local() convergent nounwind
|
||||
call void @llvm.amdgcn.s.barrier()
|
||||
%result = load i64, i64* %ptr, align 8
|
||||
store i64 %result, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
|
@ -71,13 +70,16 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs
|
|||
; SI-PROMOTE: ds_write_b64
|
||||
; SI-PROMOTE: ds_read_b64
|
||||
; SI-PROMOTE: ds_read_b64
|
||||
define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind {
|
||||
define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) #1 {
|
||||
%val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
|
||||
%array = alloca <2 x i64>, i32 16, align 16
|
||||
%ptr = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i32 %b
|
||||
store <2 x i64> %val, <2 x i64>* %ptr, align 16
|
||||
call void @llvm.AMDGPU.barrier.local() convergent nounwind
|
||||
call void @llvm.amdgcn.s.barrier()
|
||||
%result = load <2 x i64>, <2 x i64>* %ptr, align 16
|
||||
store <2 x i64> %result, <2 x i64> addrspace(1)* %out, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind convergent }
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
declare i1 @llvm.amdgcn.class.f32(float, i32) #1
|
||||
declare i1 @llvm.amdgcn.class.f64(double, i32) #1
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
declare float @llvm.fabs.f32(float) #1
|
||||
declare double @llvm.fabs.f64(double) #1
|
||||
|
||||
|
@ -133,7 +133,7 @@ define void @test_class_9bit_mask_f32(i32 addrspace(1)* %out, float %a) #0 {
|
|||
; SI: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @v_test_class_full_mask_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%a = load float, float addrspace(1)* %gep.in
|
||||
|
@ -151,7 +151,7 @@ define void @v_test_class_full_mask_f32(i32 addrspace(1)* %out, float addrspace(
|
|||
; SI: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @test_class_inline_imm_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%b = load i32, i32 addrspace(1)* %gep.in
|
||||
|
@ -171,7 +171,7 @@ define void @test_class_inline_imm_constant_dynamic_mask_f32(i32 addrspace(1)* %
|
|||
; SI: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @test_class_lit_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%b = load i32, i32 addrspace(1)* %gep.in
|
||||
|
@ -291,7 +291,7 @@ define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 {
|
|||
; SI: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%a = load double, double addrspace(1)* %in
|
||||
|
@ -307,7 +307,7 @@ define void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace
|
|||
; SI: v_cmp_class_f64_e32 vcc,
|
||||
; SI: s_endpgm
|
||||
define void @test_class_inline_imm_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%b = load i32, i32 addrspace(1)* %gep.in
|
||||
|
@ -322,7 +322,7 @@ define void @test_class_inline_imm_constant_dynamic_mask_f64(i32 addrspace(1)* %
|
|||
; SI: v_cmp_class_f64_e32 vcc, s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
|
||||
; SI: s_endpgm
|
||||
define void @test_class_lit_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%b = load i32, i32 addrspace(1)* %gep.in
|
||||
|
@ -339,7 +339,7 @@ define void @test_class_lit_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i3
|
|||
; SI-NOT: v_cmp_class
|
||||
; SI: s_endpgm
|
||||
define void @test_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%a = load float, float addrspace(1)* %gep.in
|
||||
|
@ -359,7 +359,7 @@ define void @test_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)
|
|||
; SI-NOT: v_cmp_class
|
||||
; SI: s_endpgm
|
||||
define void @test_fold_or3_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%a = load float, float addrspace(1)* %gep.in
|
||||
|
@ -382,7 +382,7 @@ define void @test_fold_or3_class_f32_0(i32 addrspace(1)* %out, float addrspace(1
|
|||
; SI-NOT: v_cmp_class
|
||||
; SI: s_endpgm
|
||||
define void @test_fold_or_all_tests_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%a = load float, float addrspace(1)* %gep.in
|
||||
|
@ -417,7 +417,7 @@ define void @test_fold_or_all_tests_class_f32_0(i32 addrspace(1)* %out, float ad
|
|||
; SI-NOT: v_cmp_class
|
||||
; SI: s_endpgm
|
||||
define void @test_fold_or_class_f32_1(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%a = load float, float addrspace(1)* %gep.in
|
||||
|
@ -437,7 +437,7 @@ define void @test_fold_or_class_f32_1(i32 addrspace(1)* %out, float addrspace(1)
|
|||
; SI-NOT: v_cmp_class
|
||||
; SI: s_endpgm
|
||||
define void @test_fold_or_class_f32_2(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%a = load float, float addrspace(1)* %gep.in
|
||||
|
@ -457,7 +457,7 @@ define void @test_fold_or_class_f32_2(i32 addrspace(1)* %out, float addrspace(1)
|
|||
; SI: s_or_b64
|
||||
; SI: s_endpgm
|
||||
define void @test_no_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in, float %b) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%a = load float, float addrspace(1)* %gep.in
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
; FIXME: Enable for VI.
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) nounwind readnone
|
||||
declare double @llvm.amdgcn.div.fmas.f64(double, double, double, i1) nounwind readnone
|
||||
|
||||
|
@ -115,7 +115,7 @@ define void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspace(1)* %out, fl
|
|||
; SI: v_div_fmas_f32 {{v[0-9]+}}, [[A]], [[B]], [[C]]
|
||||
; SI: s_endpgm
|
||||
define void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 %d) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.a = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.b = getelementptr float, float addrspace(1)* %gep.a, i32 1
|
||||
%gep.c = getelementptr float, float addrspace(1)* %gep.a, i32 2
|
||||
|
@ -152,7 +152,7 @@ define void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, flo
|
|||
; SI: s_endpgm
|
||||
define void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 addrspace(1)* %dummy) nounwind {
|
||||
entry:
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.out = getelementptr float, float addrspace(1)* %out, i32 2
|
||||
%gep.a = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.b = getelementptr float, float addrspace(1)* %gep.a, i32 1
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1) nounwind readnone
|
||||
declare { double, i1 } @llvm.amdgcn.div.scale.f64(double, double, i1) nounwind readnone
|
||||
declare float @llvm.fabs.f32(float) nounwind readnone
|
||||
|
@ -12,7 +12,7 @@ declare float @llvm.fabs.f32(float) nounwind readnone
|
|||
; SI: buffer_store_dword [[RESULT0]]
|
||||
; SI: s_endpgm
|
||||
define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -32,7 +32,7 @@ define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)*
|
|||
; SI: buffer_store_dword [[RESULT0]]
|
||||
; SI: s_endpgm
|
||||
define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -52,7 +52,7 @@ define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)*
|
|||
; SI: buffer_store_dwordx2 [[RESULT0]]
|
||||
; SI: s_endpgm
|
||||
define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -72,7 +72,7 @@ define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)
|
|||
; SI: buffer_store_dwordx2 [[RESULT0]]
|
||||
; SI: s_endpgm
|
||||
define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -92,7 +92,7 @@ define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)
|
|||
; SI: buffer_store_dword [[RESULT0]]
|
||||
; SI: s_endpgm
|
||||
define void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
|
||||
%b = load float, float addrspace(1)* %gep, align 4
|
||||
|
@ -110,7 +110,7 @@ define void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float add
|
|||
; SI: buffer_store_dword [[RESULT0]]
|
||||
; SI: s_endpgm
|
||||
define void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
|
||||
%b = load float, float addrspace(1)* %gep, align 4
|
||||
|
@ -128,7 +128,7 @@ define void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float add
|
|||
; SI: buffer_store_dword [[RESULT0]]
|
||||
; SI: s_endpgm
|
||||
define void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
|
||||
%a = load float, float addrspace(1)* %gep, align 4
|
||||
|
@ -146,7 +146,7 @@ define void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float add
|
|||
; SI: buffer_store_dword [[RESULT0]]
|
||||
; SI: s_endpgm
|
||||
define void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
|
||||
%a = load float, float addrspace(1)* %gep, align 4
|
||||
|
@ -164,7 +164,7 @@ define void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float add
|
|||
; SI: buffer_store_dwordx2 [[RESULT0]]
|
||||
; SI: s_endpgm
|
||||
define void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
|
||||
%b = load double, double addrspace(1)* %gep, align 8
|
||||
|
@ -182,7 +182,7 @@ define void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double a
|
|||
; SI: buffer_store_dwordx2 [[RESULT0]]
|
||||
; SI: s_endpgm
|
||||
define void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
|
||||
%b = load double, double addrspace(1)* %gep, align 8
|
||||
|
@ -200,7 +200,7 @@ define void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double a
|
|||
; SI: buffer_store_dwordx2 [[RESULT0]]
|
||||
; SI: s_endpgm
|
||||
define void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
|
||||
%a = load double, double addrspace(1)* %gep, align 8
|
||||
|
@ -218,7 +218,7 @@ define void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double a
|
|||
; SI: buffer_store_dwordx2 [[RESULT0]]
|
||||
; SI: s_endpgm
|
||||
define void @test_div_scale_f64_scalar_den_2(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
|
||||
%a = load double, double addrspace(1)* %gep, align 8
|
||||
|
@ -293,7 +293,7 @@ define void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, double %
|
|||
; SI: buffer_store_dword [[RESULT0]]
|
||||
; SI: s_endpgm
|
||||
define void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%a = load float, float addrspace(1)* %gep.0, align 4
|
||||
|
||||
|
@ -309,7 +309,7 @@ define void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float a
|
|||
; SI: buffer_store_dword [[RESULT0]]
|
||||
; SI: s_endpgm
|
||||
define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%a = load float, float addrspace(1)* %gep.0, align 4
|
||||
|
||||
|
@ -326,7 +326,7 @@ define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float a
|
|||
; SI: buffer_store_dword [[RESULT0]]
|
||||
; SI: s_endpgm
|
||||
define void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -348,7 +348,7 @@ define void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspa
|
|||
; SI: buffer_store_dword [[RESULT0]]
|
||||
; SI: s_endpgm
|
||||
define void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
; GCN: s_barrier
|
||||
define void @test_barrier(i32 addrspace(1)* %out) #0 {
|
||||
entry:
|
||||
%tmp = call i32 @llvm.r600.read.tidig.x()
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tmp
|
||||
store i32 %tmp, i32 addrspace(1)* %tmp1
|
||||
call void @llvm.amdgcn.s.barrier()
|
||||
|
@ -20,7 +20,7 @@ entry:
|
|||
}
|
||||
|
||||
declare void @llvm.amdgcn.s.barrier() #1
|
||||
declare i32 @llvm.r600.read.tidig.x() #2
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #2
|
||||
declare i32 @llvm.r600.read.local.size.x() #2
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
|
|
@ -27,7 +27,7 @@ define void @round_f64(double addrspace(1)* %out, double %x) #0 {
|
|||
; SI: buffer_store_dwordx2
|
||||
; SI: s_endpgm
|
||||
define void @v_round_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep = getelementptr double, double addrspace(1)* %in, i32 %tid
|
||||
%out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
|
||||
%x = load double, double addrspace(1)* %gep
|
||||
|
@ -60,7 +60,7 @@ define void @round_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %in) #0 {
|
|||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
declare double @llvm.round.f64(double) #1
|
||||
declare <2 x double> @llvm.round.v2f64(<2 x double>) #1
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
declare float @llvm.fabs.f32(float) #0
|
||||
declare float @llvm.fma.f32(float, float, float) #0
|
||||
declare float @llvm.fmuladd.f32(float, float, float) #0
|
||||
|
@ -32,7 +32,7 @@ declare float @llvm.fmuladd.f32(float, float, float) #0
|
|||
; SI-DENORM: buffer_store_dword [[RESULT]]
|
||||
; SI-STD: buffer_store_dword [[C]]
|
||||
define void @combine_to_mad_f32_0(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
|
||||
|
@ -71,7 +71,7 @@ define void @combine_to_mad_f32_0(float addrspace(1)* noalias %out, float addrsp
|
|||
; SI-STD-DAG: buffer_store_dword [[D]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
; SI: s_endpgm
|
||||
define void @combine_to_mad_f32_0_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
|
||||
|
@ -108,7 +108,7 @@ define void @combine_to_mad_f32_0_2use(float addrspace(1)* noalias %out, float a
|
|||
; SI-DENORM: buffer_store_dword [[RESULT]]
|
||||
; SI-STD: buffer_store_dword [[C]]
|
||||
define void @combine_to_mad_f32_1(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
|
||||
|
@ -138,7 +138,7 @@ define void @combine_to_mad_f32_1(float addrspace(1)* noalias %out, float addrsp
|
|||
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
define void @combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
|
||||
|
@ -175,7 +175,7 @@ define void @combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float a
|
|||
; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
; SI: s_endpgm
|
||||
define void @combine_to_mad_fsub_0_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
|
||||
|
@ -210,7 +210,7 @@ define void @combine_to_mad_fsub_0_f32_2use(float addrspace(1)* noalias %out, fl
|
|||
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
define void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
|
||||
|
@ -246,7 +246,7 @@ define void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float a
|
|||
; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
; SI: s_endpgm
|
||||
define void @combine_to_mad_fsub_1_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
|
||||
|
@ -282,7 +282,7 @@ define void @combine_to_mad_fsub_1_f32_2use(float addrspace(1)* noalias %out, fl
|
|||
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
define void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
|
||||
|
@ -320,7 +320,7 @@ define void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float a
|
|||
; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
; SI: s_endpgm
|
||||
define void @combine_to_mad_fsub_2_f32_2uses_neg(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
|
||||
|
@ -363,7 +363,7 @@ define void @combine_to_mad_fsub_2_f32_2uses_neg(float addrspace(1)* noalias %ou
|
|||
; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
; SI: s_endpgm
|
||||
define void @combine_to_mad_fsub_2_f32_2uses_mul(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
|
||||
|
@ -408,7 +408,7 @@ define void @combine_to_mad_fsub_2_f32_2uses_mul(float addrspace(1)* noalias %ou
|
|||
|
||||
; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
define void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
|
||||
|
@ -454,7 +454,7 @@ define void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %o
|
|||
; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: s_endpgm
|
||||
define void @aggressive_combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
|
||||
|
@ -500,7 +500,7 @@ define void @aggressive_combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %o
|
|||
; SI-STD: buffer_store_dword [[TMP]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: s_endpgm
|
||||
define void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
|
||||
|
@ -546,7 +546,7 @@ define void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %o
|
|||
; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: s_endpgm
|
||||
define void @aggressive_combine_to_mad_fsub_3_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
declare float @llvm.fabs.f32(float) #0
|
||||
|
||||
; FUNC-LABEL: {{^}}mad_sub_f32:
|
||||
|
@ -10,7 +10,7 @@ declare float @llvm.fabs.f32(float) #0
|
|||
; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -[[REGC]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
define void @mad_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
|
||||
%add1 = add i64 %tid.ext, 1
|
||||
|
@ -34,7 +34,7 @@ define void @mad_sub_f32(float addrspace(1)* noalias nocapture %out, float addrs
|
|||
; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], [[REGC]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
define void @mad_sub_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
|
||||
%add1 = add i64 %tid.ext, 1
|
||||
|
@ -55,7 +55,7 @@ define void @mad_sub_inv_f32(float addrspace(1)* noalias nocapture %out, float a
|
|||
; SI: v_mul_f64
|
||||
; SI: v_add_f64
|
||||
define void @mad_sub_f64(double addrspace(1)* noalias nocapture %out, double addrspace(1)* noalias nocapture readonly %ptr) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%gep0 = getelementptr double, double addrspace(1)* %ptr, i64 %tid.ext
|
||||
%add1 = add i64 %tid.ext, 1
|
||||
|
@ -79,7 +79,7 @@ define void @mad_sub_f64(double addrspace(1)* noalias nocapture %out, double add
|
|||
; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -|[[REGC]]|
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
define void @mad_sub_fabs_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
|
||||
%add1 = add i64 %tid.ext, 1
|
||||
|
@ -104,7 +104,7 @@ define void @mad_sub_fabs_f32(float addrspace(1)* noalias nocapture %out, float
|
|||
; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]|
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
define void @mad_sub_fabs_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
|
||||
%add1 = add i64 %tid.ext, 1
|
||||
|
@ -125,7 +125,7 @@ define void @mad_sub_fabs_inv_f32(float addrspace(1)* noalias nocapture %out, fl
|
|||
; FUNC-LABEL: {{^}}neg_neg_mad_f32:
|
||||
; SI: v_mac_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
define void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
|
||||
%add1 = add i64 %tid.ext, 1
|
||||
|
@ -151,7 +151,7 @@ define void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture %out, float a
|
|||
; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], |[[REGB]]|, -[[REGC]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
define void @mad_fabs_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
|
||||
%add1 = add i64 %tid.ext, 1
|
||||
|
@ -175,7 +175,7 @@ define void @mad_fabs_sub_f32(float addrspace(1)* noalias nocapture %out, float
|
|||
; SI: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
|
||||
; SI: buffer_store_dword [[R2]]
|
||||
define void @fsub_c_fadd_a_a(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
@ -196,7 +196,7 @@ define void @fsub_c_fadd_a_a(float addrspace(1)* %out, float addrspace(1)* %in)
|
|||
; SI: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
define void @fsub_fadd_a_a_c(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
; FIXME: Enable VI
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
declare float @llvm.fabs.f32(float) nounwind readnone
|
||||
|
||||
; GCN-LABEL: {{^}}madak_f32:
|
||||
|
@ -11,7 +11,7 @@ declare float @llvm.fabs.f32(float) nounwind readnone
|
|||
; GCN: buffer_load_dword [[VB:v[0-9]+]]
|
||||
; GCN: v_madak_f32_e32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
|
||||
define void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
|
||||
%in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
@ -38,7 +38,7 @@ define void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noa
|
|||
; GCN-DAG: v_mac_f32_e32 [[VK]], [[VC]], [[VA]]
|
||||
; GCN: s_endpgm
|
||||
define void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
%in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
|
||||
|
@ -65,7 +65,7 @@ define void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1
|
|||
; GCN: buffer_load_dword [[VA:v[0-9]+]]
|
||||
; GCN: v_madak_f32_e32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
|
||||
define void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) nounwind {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
||||
|
@ -85,7 +85,7 @@ define void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addr
|
|||
; GCN: buffer_load_dword [[VB:v[0-9]+]]
|
||||
; GCN: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
|
||||
define void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
|
||||
%in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
@ -107,7 +107,7 @@ define void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrsp
|
|||
; GCN-NOT: v_madak_f32
|
||||
; GCN: v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
|
||||
define void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) nounwind {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
||||
|
@ -126,7 +126,7 @@ define void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)*
|
|||
; GCN-NOT: v_madak_f32
|
||||
; GCN: v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
|
||||
define void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) nounwind {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
||||
|
@ -154,7 +154,7 @@ define void @s_s_madak_f32(float addrspace(1)* %out, float %a, float %b) nounwin
|
|||
; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}}
|
||||
; GCN: s_endpgm
|
||||
define void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
|
||||
%in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
@ -176,7 +176,7 @@ define void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float
|
|||
; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{[sv][0-9]+}}
|
||||
; GCN: s_endpgm
|
||||
define void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
|
||||
%in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; XUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
declare float @llvm.fabs.f32(float) nounwind readnone
|
||||
|
||||
; GCN-LABEL: {{^}}madmk_f32:
|
||||
|
@ -9,7 +9,7 @@ declare float @llvm.fabs.f32(float) nounwind readnone
|
|||
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; GCN: v_madmk_f32_e32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
|
||||
define void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
@ -32,7 +32,7 @@ define void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noa
|
|||
; GCN-DAG: v_mac_f32_e32 [[VC]], [[VK]], [[VA]]
|
||||
; GCN: s_endpgm
|
||||
define void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
%in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
|
||||
|
@ -61,7 +61,7 @@ define void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1
|
|||
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; GCN: v_mac_f32_e32 [[VB]], 4.0, [[VA]]
|
||||
define void @madmk_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
@ -80,7 +80,7 @@ define void @madmk_inline_imm_f32(float addrspace(1)* noalias %out, float addrsp
|
|||
; GCN: v_mac_f32_e32
|
||||
; GCN: s_endpgm
|
||||
define void @s_s_madmk_f32(float addrspace(1)* noalias %out, float %a, float %b) nounwind {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
||||
%mul = fmul float %a, 10.0
|
||||
|
@ -94,7 +94,7 @@ define void @s_s_madmk_f32(float addrspace(1)* noalias %out, float %a, float %b)
|
|||
; GCN: v_mad_f32
|
||||
; GCN: s_endpgm
|
||||
define void @v_s_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %b) nounwind {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%a = load float, float addrspace(1)* %gep.0, align 4
|
||||
|
@ -110,7 +110,7 @@ define void @v_s_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)*
|
|||
; GCN: v_mac_f32_e32
|
||||
; GCN: s_endpgm
|
||||
define void @scalar_vector_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %a) nounwind {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%b = load float, float addrspace(1)* %gep.0, align 4
|
||||
|
@ -126,7 +126,7 @@ define void @scalar_vector_madmk_f32(float addrspace(1)* noalias %out, float add
|
|||
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}}
|
||||
define void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
@ -147,7 +147,7 @@ define void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float
|
|||
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, |{{[sv][0-9]+}}|
|
||||
define void @no_madmk_src2_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
@ -168,7 +168,7 @@ define void @no_madmk_src2_modifier_f32(float addrspace(1)* noalias %out, float
|
|||
; GCN: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
|
||||
; GCN: v_mad_f32 {{v[0-9]+}}, [[VK]], [[A]], 2.0
|
||||
define void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}v_test_imax_sge_i32:
|
||||
; SI: v_max_i32_e32
|
||||
define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
|
||||
%outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
|
@ -23,7 +23,7 @@ define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr
|
|||
; SI: v_max_i32_e32
|
||||
; SI: v_max_i32_e32
|
||||
define void @v_test_imax_sge_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %aptr, <4 x i32> addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep0 = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %bptr, i32 %tid
|
||||
%outgep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %out, i32 %tid
|
||||
|
@ -58,7 +58,7 @@ define void @s_test_imax_sge_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
|
|||
; SI: buffer_load_sbyte
|
||||
; SI: v_max_i32_e32
|
||||
define void @v_test_imax_sge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid
|
||||
%outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
|
||||
|
@ -91,7 +91,7 @@ define void @s_test_imax_sgt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %
|
|||
; FUNC-LABEL: @v_test_imax_sgt_i32
|
||||
; SI: v_max_i32_e32
|
||||
define void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
|
||||
%outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
|
@ -115,7 +115,7 @@ define void @s_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
|
|||
; FUNC-LABEL: @v_test_umax_uge_i32
|
||||
; SI: v_max_u32_e32
|
||||
define void @v_test_umax_uge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
|
||||
%outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
|
@ -154,7 +154,7 @@ define void @s_test_umax_uge_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a, <
|
|||
; SI: buffer_load_ubyte
|
||||
; SI: v_max_u32_e32
|
||||
define void @v_test_umax_uge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid
|
||||
%outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
|
||||
|
@ -169,7 +169,7 @@ define void @v_test_umax_uge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i
|
|||
; FUNC-LABEL: @v_test_umax_ugt_i32
|
||||
; SI: v_max_u32_e32
|
||||
define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
|
||||
%outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
; FUNC-LABEL: @v_test_imax3_sgt_i32
|
||||
; SI: v_max3_i32
|
||||
define void @v_test_imax3_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
|
||||
%gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
|
||||
|
@ -24,7 +24,7 @@ define void @v_test_imax3_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %apt
|
|||
; FUNC-LABEL: @v_test_umax3_ugt_i32
|
||||
; SI: v_max3_u32
|
||||
define void @v_test_umax3_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
|
||||
%gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
|
||||
|
|
|
@ -376,7 +376,7 @@ define void @merge_global_store_4_adjacent_loads_inverse_i32(i32 addrspace(1)* %
|
|||
%w = load i32, i32 addrspace(1)* %in.gep.3
|
||||
|
||||
; Make sure the barrier doesn't stop this
|
||||
tail call void @llvm.AMDGPU.barrier.local() #1
|
||||
tail call void @llvm.amdgcn.s.barrier() #1
|
||||
|
||||
store i32 %w, i32 addrspace(1)* %out.gep.3
|
||||
store i32 %z, i32 addrspace(1)* %out.gep.2
|
||||
|
@ -413,7 +413,7 @@ define void @merge_global_store_4_adjacent_loads_shuffle_i32(i32 addrspace(1)* %
|
|||
%w = load i32, i32 addrspace(1)* %in.gep.3
|
||||
|
||||
; Make sure the barrier doesn't stop this
|
||||
tail call void @llvm.AMDGPU.barrier.local() #1
|
||||
tail call void @llvm.amdgcn.s.barrier() #1
|
||||
|
||||
store i32 %w, i32 addrspace(1)* %out
|
||||
store i32 %z, i32 addrspace(1)* %out.gep.1
|
||||
|
@ -705,7 +705,7 @@ define void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias %out, <3 x dou
|
|||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.AMDGPU.barrier.local() #1
|
||||
declare void @llvm.amdgcn.s.barrier() #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { convergent nounwind }
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}v_test_imin_sle_i32:
|
||||
; SI: v_min_i32_e32
|
||||
define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
|
||||
%outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
|
@ -101,7 +101,7 @@ define void @s_test_imin_sle_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <
|
|||
; FUNC-LABEL: @v_test_imin_slt_i32
|
||||
; SI: v_min_i32_e32
|
||||
define void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
|
||||
%outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
|
@ -153,7 +153,7 @@ define void @s_test_imin_sle_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
|
|||
; FUNC-LABEL: @v_test_umin_ule_i32
|
||||
; SI: v_min_u32_e32
|
||||
define void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
|
||||
%outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
|
@ -172,7 +172,7 @@ define void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr
|
|||
; SI-NOT: v_min_u32_e32
|
||||
; SI: s_endpgm
|
||||
define void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %aptr, <3 x i32> addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep0 = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %bptr, i32 %tid
|
||||
%outgep = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid
|
||||
|
@ -195,7 +195,7 @@ define void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
|
|||
; FUNC-LABEL: @v_test_umin_ult_i32
|
||||
; SI: v_min_u32_e32
|
||||
define void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
|
||||
%outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
|
@ -212,7 +212,7 @@ define void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr
|
|||
; SI: buffer_load_ubyte
|
||||
; SI: v_min_u32_e32
|
||||
define void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid
|
||||
%outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
|
||||
|
@ -240,7 +240,7 @@ define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
|
|||
; SI-NOT: v_min
|
||||
; SI: s_endpgm
|
||||
define void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace(1)* %out1, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
|
||||
%outgep0 = getelementptr i32, i32 addrspace(1)* %out0, i32 %tid
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
; FUNC-LABEL: @v_test_imin3_slt_i32
|
||||
; SI: v_min3_i32
|
||||
define void @v_test_imin3_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
|
||||
%gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
|
||||
|
@ -24,7 +24,7 @@ define void @v_test_imin3_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %apt
|
|||
; FUNC-LABEL: @v_test_umin3_ult_i32
|
||||
; SI: v_min3_u32
|
||||
define void @v_test_umin3_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
|
||||
%gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
|
||||
|
@ -44,7 +44,7 @@ define void @v_test_umin3_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %apt
|
|||
; SI: v_min_i32
|
||||
; SI: v_min3_i32
|
||||
define void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%tid2 = mul i32 %tid, 2
|
||||
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
|
||||
|
@ -78,7 +78,7 @@ define void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 addrspace(1)* %ap
|
|||
; FUNC-LABEL: @v_test_umin3_2_uses
|
||||
; SI-NOT: v_min3
|
||||
define void @v_test_umin3_2_uses(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%tid2 = mul i32 %tid, 2
|
||||
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
|
||||
|
|
|
@ -7,12 +7,12 @@
|
|||
; Check that moving the pointer out of the resource descriptor to
|
||||
; vaddr works for atomics.
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_max_i32:
|
||||
; GCN: buffer_atomic_smax v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:400 glc{{$}}
|
||||
define void @atomic_max_i32(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %in, i32 addrspace(1)* %x, i32 %y) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i32 %tid
|
||||
%ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep
|
||||
%xor = xor i32 %tid, 1
|
||||
|
@ -32,7 +32,7 @@ exit:
|
|||
; GCN-LABEL: {{^}}atomic_max_i32_noret:
|
||||
; GCN: buffer_atomic_smax v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:400{{$}}
|
||||
define void @atomic_max_i32_noret(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %in, i32 addrspace(1)* %x, i32 %y) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i32 %tid
|
||||
%ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep
|
||||
%xor = xor i32 %tid, 1
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -march=amdgcn -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() readnone
|
||||
|
||||
;;;==========================================================================;;;
|
||||
;;; MUBUF LOAD TESTS
|
||||
|
@ -170,7 +170,7 @@ define void @store_sgpr_ptr_large_offset_atomic(i32 addrspace(1)* %out) #0 {
|
|||
; CHECK-LABEL: {{^}}store_vgpr_ptr:
|
||||
; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
|
||||
define void @store_vgpr_ptr(i32 addrspace(1)* %out) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
store i32 99, i32 addrspace(1)* %out.gep, align 4
|
||||
ret void
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
; Make sure we don't turn the 32-bit argument load into a 16-bit
|
||||
; load. There aren't extending scalar lods, so that would require
|
||||
|
@ -22,7 +22,7 @@ define void @truncate_kernarg_i32_to_i16(i16 addrspace(1)* %out, i32 %arg) nounw
|
|||
; SI: buffer_load_dword v
|
||||
; SI: buffer_store_short v
|
||||
define void @truncate_buffer_load_i32_to_i16(i16 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
|
||||
%load = load i32, i32 addrspace(1)* %gep.in
|
||||
|
@ -44,7 +44,7 @@ define void @truncate_kernarg_i32_to_i8(i8 addrspace(1)* %out, i32 %arg) nounwin
|
|||
; SI: buffer_load_dword v
|
||||
; SI: buffer_store_byte v
|
||||
define void @truncate_buffer_load_i32_to_i8(i8 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
|
||||
%load = load i32, i32 addrspace(1)* %gep.in
|
||||
|
@ -66,7 +66,7 @@ define void @truncate_kernarg_i32_to_i1(i1 addrspace(1)* %out, i32 %arg) nounwin
|
|||
; SI: buffer_load_dword v
|
||||
; SI: buffer_store_byte v
|
||||
define void @truncate_buffer_load_i32_to_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i1, i1 addrspace(1)* %out, i32 %tid
|
||||
%load = load i32, i32 addrspace(1)* %gep.in
|
||||
|
@ -88,7 +88,7 @@ define void @truncate_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounw
|
|||
; SI: buffer_load_dword v
|
||||
; SI: buffer_store_dword v
|
||||
define void @truncate_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%load = load i64, i64 addrspace(1)* %gep.in
|
||||
|
@ -111,7 +111,7 @@ define void @srl_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounwind {
|
|||
; SI: buffer_load_dword v
|
||||
; SI: buffer_store_dword v
|
||||
define void @srl_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%load = load i64, i64 addrspace(1)* %gep.in
|
||||
|
@ -135,7 +135,7 @@ define void @truncate_kernarg_i16_to_i8(i8 addrspace(1)* %out, i16 %arg) nounwin
|
|||
; SI: buffer_load_ubyte v
|
||||
; SI: buffer_store_byte v
|
||||
define void @truncate_buffer_load_i16_to_i8(i8 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.in = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
|
||||
%load = load i16, i16 addrspace(1)* %gep.in
|
||||
|
@ -158,7 +158,7 @@ define void @srl_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwind {
|
|||
; SI: buffer_load_dword v
|
||||
; SI: buffer_store_byte v
|
||||
define void @srl_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
|
||||
%load = load i64, i64 addrspace(1)* %gep.in
|
||||
|
@ -181,7 +181,7 @@ define void @truncate_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwin
|
|||
; SI: buffer_load_dword v
|
||||
; SI: buffer_store_byte v
|
||||
define void @truncate_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
|
||||
%load = load i64, i64 addrspace(1)* %gep.in
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: {{^}}fold_sgpr:
|
||||
; CHECK: v_add_i32_e32 v{{[0-9]+}}, vcc, s
|
||||
|
@ -8,7 +8,7 @@ entry:
|
|||
br i1 %tmp0, label %if, label %endif
|
||||
|
||||
if:
|
||||
%id = call i32 @llvm.r600.read.tidig.x()
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%offset = add i32 %fold, %id
|
||||
%tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 %offset
|
||||
store i32 0, i32 addrspace(1)* %tmp1
|
||||
|
@ -27,7 +27,7 @@ entry:
|
|||
br i1 %tmp0, label %if, label %endif
|
||||
|
||||
if:
|
||||
%id = call i32 @llvm.r600.read.tidig.x()
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%val = or i32 %id, %fold
|
||||
store i32 %val, i32 addrspace(1)* %out
|
||||
br label %endif
|
||||
|
@ -63,7 +63,7 @@ entry:
|
|||
|
||||
define void @vector_inline(<4 x i32> addrspace(1)* %out) {
|
||||
entry:
|
||||
%tmp0 = call i32 @llvm.r600.read.tidig.x()
|
||||
%tmp0 = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = add i32 %tmp0, 1
|
||||
%tmp2 = add i32 %tmp0, 2
|
||||
%tmp3 = add i32 %tmp0, 3
|
||||
|
@ -82,7 +82,7 @@ entry:
|
|||
|
||||
define void @imm_one_use(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%tmp0 = call i32 @llvm.r600.read.tidig.x()
|
||||
%tmp0 = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = xor i32 %tmp0, 100
|
||||
store i32 %tmp1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
|
@ -96,7 +96,7 @@ entry:
|
|||
|
||||
define void @vector_imm(<4 x i32> addrspace(1)* %out) {
|
||||
entry:
|
||||
%tmp0 = call i32 @llvm.r600.read.tidig.x()
|
||||
%tmp0 = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = add i32 %tmp0, 1
|
||||
%tmp2 = add i32 %tmp0, 2
|
||||
%tmp3 = add i32 %tmp0, 3
|
||||
|
@ -109,5 +109,6 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
attributes #0 = { readnone }
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
|
|
|
@ -8,10 +8,10 @@
|
|||
; During live interval construction, the first sub register def is
|
||||
; incorrectly marked as dead.
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
define void @dead_def_subregister(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %in.gep
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
declare float @llvm.sqrt.f32(float) nounwind readnone
|
||||
declare double @llvm.sqrt.f64(double) nounwind readnone
|
||||
|
||||
|
@ -56,7 +56,7 @@ define void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) nounwind
|
|||
|
||||
; SI: s_endpgm
|
||||
define void @rsqrt_fmul(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=CI %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI --check-prefix=GCN-HSA %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
declare i32 @llvm.r600.read.tidig.y() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.y() #0
|
||||
|
||||
; In this test both the pointer and the offset operands to the
|
||||
; BUFFER_LOAD instructions end up being stored in vgprs. This
|
||||
|
@ -26,8 +26,8 @@ declare i32 @llvm.r600.read.tidig.y() #0
|
|||
|
||||
define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
|
||||
entry:
|
||||
%tmp = call i32 @llvm.r600.read.tidig.x()
|
||||
%tmp1 = call i32 @llvm.r600.read.tidig.y()
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = call i32 @llvm.amdgcn.workitem.id.y()
|
||||
%tmp2 = sext i32 %tmp to i64
|
||||
%tmp3 = sext i32 %tmp1 to i64
|
||||
br label %loop
|
||||
|
@ -87,7 +87,7 @@ endif: ; preds = %else, %if
|
|||
; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
|
||||
define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) #1 {
|
||||
entry:
|
||||
%tmp = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = add i32 %tmp, 4
|
||||
%tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4
|
||||
%tmp3 = load i32, i32 addrspace(2)* %tmp2
|
||||
|
@ -107,7 +107,7 @@ entry:
|
|||
; GCN-HSA: flat_store_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
|
||||
define void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %c) #1 {
|
||||
entry:
|
||||
%tmp = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp2 = getelementptr i32, i32 addrspace(2)* %in, i32 %tmp
|
||||
%tmp3 = getelementptr i32, i32 addrspace(2)* %tmp2, i32 5000
|
||||
%tmp4 = load i32, i32 addrspace(2)* %tmp3
|
||||
|
@ -127,7 +127,7 @@ entry:
|
|||
; GCN-HSA: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
|
||||
define void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(2)* %in, i64 %c) #1 {
|
||||
entry:
|
||||
%tmp = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp2 = getelementptr i64, i64 addrspace(2)* %in, i32 %tmp
|
||||
%tmp3 = getelementptr i64, i64 addrspace(2)* %tmp2, i32 5000
|
||||
%tmp4 = load i64, i64 addrspace(2)* %tmp3
|
||||
|
@ -149,7 +149,7 @@ entry:
|
|||
; GCN-HSA: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
|
||||
define void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in, <4 x i32> %c) #1 {
|
||||
entry:
|
||||
%tmp = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp2 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %in, i32 %tmp
|
||||
%tmp3 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %tmp2, i32 1234
|
||||
%tmp4 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp3
|
||||
|
@ -185,7 +185,7 @@ entry:
|
|||
; GCN-HSA: flat_load_dwordx4
|
||||
define void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in, <8 x i32> %c) #1 {
|
||||
entry:
|
||||
%tmp = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp2 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %in, i32 %tmp
|
||||
%tmp3 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %tmp2, i32 1234
|
||||
%tmp4 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp3
|
||||
|
@ -234,7 +234,7 @@ entry:
|
|||
; GCN: s_endpgm
|
||||
define void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in, <16 x i32> %c) #1 {
|
||||
entry:
|
||||
%tmp = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp2 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %in, i32 %tmp
|
||||
%tmp3 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %tmp2, i32 1234
|
||||
%tmp4 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp3
|
||||
|
@ -251,7 +251,7 @@ entry:
|
|||
; GCN-HSA: flat_store_dword [[ADD]]
|
||||
define void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in, i32 %a) #1 {
|
||||
entry:
|
||||
%tmp = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = add i32 %tmp, 4
|
||||
%tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4
|
||||
%tmp3 = load i32, i32 addrspace(2)* %tmp2
|
||||
|
@ -265,7 +265,7 @@ entry:
|
|||
; GCN-HSA flat_load_dword v{{[0-9]}}, v{{[0-9]+:[0-9]+}}
|
||||
define void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 {
|
||||
entry:
|
||||
%tmp = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = add i32 %tmp, 4
|
||||
%tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %tmp, i32 255
|
||||
%tmp3 = load i32, i32 addrspace(2)* %tmp2
|
||||
|
@ -279,7 +279,7 @@ entry:
|
|||
; GCN-HSA: flat_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}]
|
||||
define void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 {
|
||||
entry:
|
||||
%tmp = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = add i32 %tmp, 4
|
||||
%tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %tmp, i32 256
|
||||
%tmp3 = load i32, i32 addrspace(2)* %tmp2
|
||||
|
@ -294,7 +294,7 @@ entry:
|
|||
; GCN-HSA: flat_load_dwordx4
|
||||
define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
|
||||
entry:
|
||||
%tmp0 = tail call i32 @llvm.r600.read.tidig.x()
|
||||
%tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
|
||||
%tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
|
||||
%tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4
|
||||
|
@ -317,7 +317,7 @@ entry:
|
|||
; GCN-HSA: flat_load_dwordx4
|
||||
define void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
|
||||
entry:
|
||||
%tmp0 = tail call i32 @llvm.r600.read.tidig.x()
|
||||
%tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
|
||||
%tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
|
||||
%tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4
|
||||
|
@ -354,7 +354,7 @@ entry:
|
|||
; GCN-HSA: flat_load_dwordx4
|
||||
define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
|
||||
entry:
|
||||
%tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
|
||||
%tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)*
|
||||
%tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4
|
||||
|
@ -389,7 +389,7 @@ entry:
|
|||
; GCN-HSA: flat_load_dwordx4
|
||||
define void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
|
||||
entry:
|
||||
%tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
|
||||
%tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)*
|
||||
%tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
|
||||
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
|
||||
; FIXME: This currently doesn't do a great job of clustering the
|
||||
; loads, which end up with extra moves between them. Right now, it
|
||||
; seems the only things areLoadsFromSameBasePtr is accomplishing is
|
||||
|
|
|
@ -40,7 +40,7 @@ endif:
|
|||
|
||||
define void @sgpr_if_else_valu_br(i32 addrspace(1)* %out, float %a, i32 %b, i32 %c, i32 %d, i32 %e) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%tid_f = uitofp i32 %tid to float
|
||||
%tmp1 = fcmp ueq float %tid_f, 0.0
|
||||
br i1 %tmp1, label %if, label %else
|
||||
|
@ -77,7 +77,7 @@ endif:
|
|||
; SI: buffer_store_dword [[RESULT]]
|
||||
define void @sgpr_if_else_valu_cmp_phi_br(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%tmp1 = icmp eq i32 %tid, 0
|
||||
br i1 %tmp1, label %if, label %else
|
||||
|
||||
|
@ -100,6 +100,6 @@ endif:
|
|||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
attributes #0 = { readnone }
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
; Test with inline immediate
|
||||
|
||||
|
@ -10,7 +10,7 @@ declare i32 @llvm.r600.read.tidig.x() #1
|
|||
; SI: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
|
||||
%val = load i32, i32 addrspace(1)* %ptr, align 4
|
||||
%add = add i32 %val, 9
|
||||
|
@ -26,7 +26,7 @@ define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
|||
; SI-DAG: buffer_store_dword [[SHLREG]]
|
||||
; SI: s_endpgm
|
||||
define void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
|
||||
%val = load i32, i32 addrspace(1)* %ptr, align 4
|
||||
%add = add i32 %val, 9
|
||||
|
@ -44,7 +44,7 @@ define void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1
|
|||
; SI: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
|
||||
%val = load i32, i32 addrspace(1)* %ptr, align 4
|
||||
%shl = add i32 %val, 999
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
; LDS globals.
|
||||
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
@lds0 = addrspace(3) global [512 x float] undef, align 4
|
||||
@lds1 = addrspace(3) global [512 x float] undef, align 4
|
||||
|
@ -20,7 +20,7 @@ declare i32 @llvm.r600.read.tidig.x() #1
|
|||
; SI: ds_read_b32 {{v[0-9]+}}, [[PTR]] offset:8
|
||||
; SI: s_endpgm
|
||||
define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 2
|
||||
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
|
||||
%val0 = load float, float addrspace(3)* %arrayidx0, align 4
|
||||
|
@ -40,7 +40,7 @@ define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %ad
|
|||
; SI-DAG: buffer_store_dword [[ADDUSE]]
|
||||
; SI: s_endpgm
|
||||
define void @load_shl_base_lds_1(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 2
|
||||
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
|
||||
%val0 = load float, float addrspace(3)* %arrayidx0, align 4
|
||||
|
@ -56,7 +56,7 @@ define void @load_shl_base_lds_1(float addrspace(1)* %out, i32 addrspace(1)* %ad
|
|||
; SI: ds_read_u8 v{{[0-9]+}}, v{{[0-9]+}} offset:65535
|
||||
; SI: s_endpgm
|
||||
define void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %lds, i32 addrspace(1)* %add_use) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 65535
|
||||
%arrayidx0 = getelementptr inbounds [65536 x i8], [65536 x i8] addrspace(3)* @maxlds, i32 0, i32 %idx.0
|
||||
%val0 = load i8, i8 addrspace(3)* %arrayidx0
|
||||
|
@ -74,7 +74,7 @@ define void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)
|
|||
; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9
|
||||
; SI: s_endpgm
|
||||
define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 64
|
||||
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
|
||||
%val0 = load float, float addrspace(3)* %arrayidx0, align 4
|
||||
|
@ -90,7 +90,7 @@ define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
|
|||
; SI: ds_write_b32 [[PTR]], {{v[0-9]+}} offset:8
|
||||
; SI: s_endpgm
|
||||
define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 2
|
||||
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
|
||||
store float 1.0, float addrspace(3)* %arrayidx0, align 4
|
||||
|
@ -105,7 +105,7 @@ define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %a
|
|||
@lds2 = addrspace(3) global [512 x i32] undef, align 4
|
||||
|
||||
; define void @atomic_load_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||
; %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
; %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
; %idx.0 = add nsw i32 %tid.x, 2
|
||||
; %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
||||
; %val = load atomic i32, i32 addrspace(3)* %arrayidx0 seq_cst, align 4
|
||||
|
@ -120,7 +120,7 @@ define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %a
|
|||
; SI: ds_cmpst_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, {{v[0-9]+}} offset:8
|
||||
; SI: s_endpgm
|
||||
define void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use, i32 %swap) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 2
|
||||
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
||||
%pair = cmpxchg i32 addrspace(3)* %arrayidx0, i32 7, i32 %swap seq_cst monotonic
|
||||
|
@ -135,7 +135,7 @@ define void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace
|
|||
; SI: ds_wrxchg_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
|
||||
; SI: s_endpgm
|
||||
define void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 2
|
||||
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
||||
%val = atomicrmw xchg i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
|
||||
|
@ -149,7 +149,7 @@ define void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)
|
|||
; SI: ds_add_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
|
||||
; SI: s_endpgm
|
||||
define void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 2
|
||||
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
||||
%val = atomicrmw add i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
|
||||
|
@ -163,7 +163,7 @@ define void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
|
|||
; SI: ds_sub_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
|
||||
; SI: s_endpgm
|
||||
define void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 2
|
||||
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
||||
%val = atomicrmw sub i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
|
||||
|
@ -177,7 +177,7 @@ define void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
|
|||
; SI: ds_and_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
|
||||
; SI: s_endpgm
|
||||
define void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 2
|
||||
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
||||
%val = atomicrmw and i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
|
||||
|
@ -191,7 +191,7 @@ define void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
|
|||
; SI: ds_or_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
|
||||
; SI: s_endpgm
|
||||
define void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 2
|
||||
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
||||
%val = atomicrmw or i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
|
||||
|
@ -205,7 +205,7 @@ define void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
|
|||
; SI: ds_xor_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
|
||||
; SI: s_endpgm
|
||||
define void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 2
|
||||
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
||||
%val = atomicrmw xor i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
|
||||
|
@ -215,7 +215,7 @@ define void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
|
|||
}
|
||||
|
||||
; define void @atomic_nand_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||
; %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
; %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
; %idx.0 = add nsw i32 %tid.x, 2
|
||||
; %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
||||
; %val = atomicrmw nand i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
|
||||
|
@ -229,7 +229,7 @@ define void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
|
|||
; SI: ds_min_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
|
||||
; SI: s_endpgm
|
||||
define void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 2
|
||||
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
||||
%val = atomicrmw min i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
|
||||
|
@ -243,7 +243,7 @@ define void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
|
|||
; SI: ds_max_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
|
||||
; SI: s_endpgm
|
||||
define void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 2
|
||||
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
||||
%val = atomicrmw max i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
|
||||
|
@ -257,7 +257,7 @@ define void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
|
|||
; SI: ds_min_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
|
||||
; SI: s_endpgm
|
||||
define void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 2
|
||||
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
||||
%val = atomicrmw umin i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
|
||||
|
@ -271,7 +271,7 @@ define void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)
|
|||
; SI: ds_max_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
|
||||
; SI: s_endpgm
|
||||
define void @atomic_umax_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 2
|
||||
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
||||
%val = atomicrmw umax i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
|
||||
declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
|
||||
declare void @llvm.AMDGPU.barrier.local() #2
|
||||
declare void @llvm.amdgcn.s.barrier() #2
|
||||
|
||||
|
||||
@stored_lds_ptr = addrspace(3) global i32 addrspace(3)* undef, align 4
|
||||
|
@ -61,7 +61,7 @@ define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace
|
|||
|
||||
%tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
|
||||
store i32 99, i32 addrspace(1)* %gptr, align 4
|
||||
call void @llvm.AMDGPU.barrier.local() #2
|
||||
call void @llvm.amdgcn.s.barrier() #2
|
||||
%tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
|
||||
|
||||
%add = add nsw i32 %tmp1, %tmp2
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
; SI-LABEL: {{^}}sint_to_fp_i32_to_f64
|
||||
; SI: v_cvt_f64_i32_e32
|
||||
|
@ -52,7 +52,7 @@ define void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) {
|
|||
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
|
||||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
define void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %gep, align 8
|
||||
%result = sitofp i64 %val to double
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
|
||||
|
||||
; FIXME: This should be merged with sint_to_fp.ll, but s_sint_to_fp_v2i64 crashes on r600
|
||||
|
@ -28,7 +28,7 @@ define void @s_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 {
|
|||
; GCN: v_cndmask_b32_e32 [[SIGN_SEL:v[0-9]+]],
|
||||
; GCN: {{buffer|flat}}_store_dword [[SIGN_SEL]]
|
||||
define void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %in.gep
|
||||
|
@ -46,7 +46,7 @@ define void @s_sint_to_fp_v2i64(<2 x float> addrspace(1)* %out, <2 x i64> %in) #
|
|||
|
||||
; FUNC-LABEL: {{^}}v_sint_to_fp_v4i64:
|
||||
define void @v_sint_to_fp_v4i64(<4 x float> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
|
||||
%out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
|
||||
%value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep
|
||||
|
@ -55,7 +55,7 @@ define void @v_sint_to_fp_v4i64(<4 x float> addrspace(1)* %out, <4 x i64> addrsp
|
|||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() readnone
|
||||
|
||||
; This is broken because the low half of the 64-bit add remains on the
|
||||
; SALU, but the upper half does not. The addc expects the carry bit
|
||||
|
@ -62,7 +62,7 @@ define void @s_imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 %val0, i6
|
|||
; SI: v_add_i32_e32 {{v[0-9]+}}, vcc, {{s[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: v_addc_u32_e32 {{v[0-9]+}}, vcc, {{v[0-9]+}}, {{v[0-9]+}}, vcc
|
||||
define void @imp_def_vcc_split_i64_add_2(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%load = load i32, i32 addrspace(1)* %gep
|
||||
%vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
|
||||
|
|
|
@ -35,14 +35,14 @@ define void @ds_reorder_vector_split(<4 x i64> addrspace(1)* nocapture readonly
|
|||
entry:
|
||||
%tmp = tail call i32 @llvm.r600.read.local.size.y()
|
||||
%tmp1 = tail call i32 @llvm.r600.read.local.size.z()
|
||||
%tmp2 = tail call i32 @llvm.r600.read.tidig.x()
|
||||
%tmp3 = tail call i32 @llvm.r600.read.tidig.y()
|
||||
%tmp4 = tail call i32 @llvm.r600.read.tidig.z()
|
||||
%tmp2 = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp3 = tail call i32 @llvm.amdgcn.workitem.id.y()
|
||||
%tmp4 = tail call i32 @llvm.amdgcn.workitem.id.z()
|
||||
%tmp6 = mul i32 %tmp2, %tmp
|
||||
%tmp10 = add i32 %tmp3, %tmp6
|
||||
%tmp11 = mul i32 %tmp10, %tmp1
|
||||
%tmp9 = add i32 %tmp11, %tmp4
|
||||
%x.i.i = tail call i32 @llvm.r600.read.tgid.x() #1
|
||||
%x.i.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #1
|
||||
%x.i.12.i = tail call i32 @llvm.r600.read.local.size.x() #1
|
||||
%mul.26.i = mul i32 %x.i.12.i, %x.i.i
|
||||
%add.i = add i32 %tmp2, %mul.26.i
|
||||
|
@ -81,13 +81,13 @@ entry:
|
|||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tgid.x() #1
|
||||
declare i32 @llvm.amdgcn.workgroup.id.x() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.local.size.x() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.local.size.y() #1
|
||||
|
@ -96,10 +96,10 @@ declare i32 @llvm.r600.read.local.size.y() #1
|
|||
declare i32 @llvm.r600.read.local.size.z() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tidig.y() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.y() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tidig.z() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.z() #1
|
||||
|
||||
attributes #0 = { norecurse nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
; CHECK: s_barrier
|
||||
; CHECK: s_endpgm
|
||||
; Function Attrs: nounwind
|
||||
define void @test(<2 x i8> addrspace(3)* nocapture %arg, <2 x i8> addrspace(1)* nocapture readonly %arg1, i32 addrspace(1)* nocapture readonly %arg2, <2 x i8> addrspace(1)* nocapture %arg3, i32 %arg4, i64 %tmp9) {
|
||||
define void @test(<2 x i8> addrspace(3)* nocapture %arg, <2 x i8> addrspace(1)* nocapture readonly %arg1, i32 addrspace(1)* nocapture readonly %arg2, <2 x i8> addrspace(1)* nocapture %arg3, i32 %arg4, i64 %tmp9) #0 {
|
||||
bb:
|
||||
%tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp9
|
||||
%tmp13 = load i32, i32 addrspace(1)* %tmp10, align 2
|
||||
|
@ -21,7 +21,7 @@ bb:
|
|||
%tmp16 = add i32 %tmp13, 1
|
||||
%tmp17 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp16
|
||||
store <2 x i8> %tmp15, <2 x i8> addrspace(3)* %tmp17, align 2
|
||||
tail call void @llvm.AMDGPU.barrier.local() #2
|
||||
tail call void @llvm.amdgcn.s.barrier()
|
||||
%tmp25 = load i32, i32 addrspace(1)* %tmp10, align 4
|
||||
%tmp26 = sext i32 %tmp25 to i64
|
||||
%tmp27 = sext i32 %arg4 to i64
|
||||
|
@ -37,6 +37,7 @@ bb:
|
|||
}
|
||||
|
||||
; Function Attrs: convergent nounwind
|
||||
declare void @llvm.AMDGPU.barrier.local() #2
|
||||
declare void @llvm.amdgcn.s.barrier() #1
|
||||
|
||||
attributes #2 = { convergent nounwind }
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { convergent nounwind }
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
; SI-LABEL: {{^}}v_uint_to_fp_i64_to_f64
|
||||
; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
|
||||
|
@ -10,7 +10,7 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
|||
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
|
||||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
define void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %gep, align 8
|
||||
%result = uitofp i64 %val to double
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
; SI-LABEL: {{^}}v_cnd_nan_nosgpr:
|
||||
; SI: v_cndmask_b32_e64 v{{[0-9]}}, v{{[0-9]}}, -1, s{{\[[0-9]+:[0-9]+\]}}
|
||||
|
@ -9,7 +9,7 @@ declare i32 @llvm.r600.read.tidig.x() #1
|
|||
; All nan values are converted to 0xffffffff
|
||||
; SI: s_endpgm
|
||||
define void @v_cnd_nan_nosgpr(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 {
|
||||
%idx = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%idx = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%f.gep = getelementptr float, float addrspace(1)* %fptr, i32 %idx
|
||||
%f = load float, float addrspace(1)* %fptr
|
||||
%setcc = icmp ne i32 %c, 0
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
; SI-LABEL: @test_if
|
||||
; Make sure the i1 values created by the cfg structurizer pass are
|
||||
|
@ -54,7 +54,7 @@ end:
|
|||
; SI: s_or_b64 exec, exec, [[BR_SREG]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_test_v_if(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%is.0 = icmp ne i32 %tid, 0
|
||||
br i1 %is.0, label %store, label %exit
|
||||
|
||||
|
@ -86,7 +86,7 @@ exit:
|
|||
|
||||
define void @simple_test_v_loop(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
|
||||
entry:
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%is.0 = icmp ne i32 %tid, 0
|
||||
%limit = add i32 %tid, 64
|
||||
br i1 %is.0, label %loop, label %exit
|
||||
|
@ -152,7 +152,7 @@ exit:
|
|||
|
||||
define void @multi_vcond_loop(i32 addrspace(1)* noalias nocapture %arg, i32 addrspace(1)* noalias nocapture readonly %arg1, i32 addrspace(1)* noalias nocapture readonly %arg2, i32 addrspace(1)* noalias nocapture readonly %arg3) #1 {
|
||||
bb:
|
||||
%tmp = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%tmp4 = sext i32 %tmp to i64
|
||||
%tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg3, i64 %tmp4
|
||||
%tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
|
||||
|
|
|
@ -11,10 +11,6 @@
|
|||
|
||||
; FIXME: The same register is initialized to 0 for every spill.
|
||||
|
||||
declare i32 @llvm.r600.read.tgid.x() #1
|
||||
declare i32 @llvm.r600.read.tgid.y() #1
|
||||
declare i32 @llvm.r600.read.tgid.z() #1
|
||||
|
||||
; GCN-LABEL: {{^}}spill_vgpr_compute:
|
||||
|
||||
; GCN: s_mov_b32 s16, s3
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
; Test that we correctly commute a sub instruction
|
||||
; FUNC-LABEL: {{^}}sub_rev:
|
||||
|
@ -10,7 +10,7 @@
|
|||
|
||||
define void @sub_rev(i32 addrspace(1)* %out, <4 x i32> %sgpr, i32 %cond) {
|
||||
entry:
|
||||
%vgpr = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%vgpr = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%tmp = icmp eq i32 %cond, 0
|
||||
br i1 %tmp, label %if, label %else
|
||||
|
||||
|
@ -37,7 +37,7 @@ endif: ; preds = %else, %if
|
|||
; SI: v_add_f32_e32 v{{[0-9]+}}, 0x44800000
|
||||
define void @add_fold(float addrspace(1)* %out) {
|
||||
entry:
|
||||
%tmp = call i32 @llvm.r600.read.tidig.x()
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp1 = uitofp i32 %tmp to float
|
||||
%tmp2 = fadd float %tmp1, 1.024000e+03
|
||||
store float %tmp2, float addrspace(1)* %out
|
||||
|
@ -45,7 +45,7 @@ entry:
|
|||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { readnone }
|
||||
|
|
|
@ -18,7 +18,7 @@ main_body:
|
|||
%tmp11 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp10, i32 0, i32 %arg6)
|
||||
%tmp12 = extractelement <4 x float> %tmp11, i32 0
|
||||
%tmp13 = extractelement <4 x float> %tmp11, i32 1
|
||||
call void @llvm.AMDGPU.barrier.global() #1
|
||||
call void @llvm.amdgcn.s.barrier() #1
|
||||
%tmp14 = extractelement <4 x float> %tmp11, i32 2
|
||||
; %tmp15 = extractelement <4 x float> %tmp11, i32 3
|
||||
%tmp15 = load float, float addrspace(2)* %constptr, align 4 ; Force waiting for expcnt and lgkmcnt
|
||||
|
@ -71,7 +71,7 @@ main_body:
|
|||
|
||||
|
||||
; Function Attrs: convergent nounwind
|
||||
declare void @llvm.AMDGPU.barrier.global() #1
|
||||
declare void @llvm.amdgcn.s.barrier() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #2
|
||||
|
|
|
@ -7,11 +7,11 @@
|
|||
; can do to avoid this.
|
||||
|
||||
declare void @llvm.write_register.i32(metadata, i32) #0
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
|
||||
define void @write_vgpr_into_sgpr() {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
call void @llvm.write_register.i32(metadata !0, i32 %tid)
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
define void @test_sink_ptrtoint_asc(float addrspace(1)* nocapture %arg, float addrspace(1)* nocapture readonly %arg1, float addrspace(3)* %arg2) #0 {
|
||||
bb:
|
||||
%tmp = getelementptr inbounds float, float addrspace(3)* %arg2, i32 16
|
||||
%tmp2 = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tmp2 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%tmp3 = sext i32 %tmp2 to i64
|
||||
%tmp4 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 %tmp3
|
||||
%tmp5 = load float, float addrspace(1)* %tmp4, align 4
|
||||
|
@ -43,7 +43,7 @@ bb15: ; preds = %bb14, %bb8
|
|||
}
|
||||
|
||||
declare float @llvm.fma.f32(float, float, float) #1
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -loop-unroll -S < %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: @test_unroll_convergent_barrier(
|
||||
; CHECK: call void @llvm.AMDGPU.barrier.global()
|
||||
; CHECK: call void @llvm.AMDGPU.barrier.global()
|
||||
; CHECK: call void @llvm.AMDGPU.barrier.global()
|
||||
; CHECK: call void @llvm.AMDGPU.barrier.global()
|
||||
; CHECK: call void @llvm.amdgcn.s.barrier()
|
||||
; CHECK: call void @llvm.amdgcn.s.barrier()
|
||||
; CHECK: call void @llvm.amdgcn.s.barrier()
|
||||
; CHECK: call void @llvm.amdgcn.s.barrier()
|
||||
; CHECK-NOT: br
|
||||
define void @test_unroll_convergent_barrier(i32 addrspace(1)* noalias nocapture %out, i32 addrspace(1)* noalias nocapture %in) #0 {
|
||||
entry:
|
||||
|
@ -16,7 +16,7 @@ for.body: ; preds = %for.body, %entry
|
|||
%arrayidx.in = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %indvars.iv
|
||||
%arrayidx.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %indvars.iv
|
||||
%load = load i32, i32 addrspace(1)* %arrayidx.in
|
||||
call void @llvm.AMDGPU.barrier.global() #1
|
||||
call void @llvm.amdgcn.s.barrier() #1
|
||||
%add = add i32 %load, %sum.02
|
||||
store i32 %add, i32 addrspace(1)* %arrayidx.out
|
||||
%indvars.iv.next = add i32 %indvars.iv, 1
|
||||
|
@ -27,7 +27,7 @@ for.end: ; preds = %for.body, %entry
|
|||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.AMDGPU.barrier.global() #1
|
||||
declare void @llvm.amdgcn.s.barrier() #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind convergent }
|
||||
|
|
|
@ -63,17 +63,6 @@ ENDIF28: ; preds = %ENDIF
|
|||
br i1 %tmp36, label %ENDLOOP, label %LOOP.outer
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @llvm.AMDGPU.clamp.f32(float, float, float) #2
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" }
|
||||
attributes #1 = { nounwind readnone }
|
||||
attributes #2 = { readnone }
|
||||
|
||||
!0 = !{!1, !1, i64 0, i32 1}
|
||||
!1 = !{!"const", null}
|
||||
|
|
Loading…
Reference in New Issue