AMDGPU: Add some tests for promotion of v2i64 scalar_to_vector

llvm-svn: 254087
This commit is contained in:
Matt Arsenault 2015-11-25 20:01:03 +00:00
parent 61001bbc03
commit d179481857
1 changed files with 71 additions and 0 deletions

View File

@ -634,6 +634,77 @@ define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) {
ret void
}
; This requires handling of scalar_to_vector for v2i64 to avoid
; scratch usage.
; FIXME: Should do single load and store
; GCN-LABEL: {{^}}copy_v3i32_align4:
; GCN-NOT: SCRATCH_RSRC_DWORD
; GCN-DAG: buffer_load_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN-NOT: offen
; GCN: s_waitcnt vmcnt
; GCN-NOT: offen
; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
; GCN: ScratchSize: 0{{$}}
define void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> addrspace(1)* noalias %in) #0 {
%vec = load <3 x i32>, <3 x i32> addrspace(1)* %in, align 4
store <3 x i32> %vec, <3 x i32> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}copy_v3i64_align4:
; GCN-NOT: SCRATCH_RSRC_DWORD
; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
; GCN-NOT: offen
; GCN: s_waitcnt vmcnt
; GCN-NOT: offen
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
; GCN: ScratchSize: 0{{$}}
define void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> addrspace(1)* noalias %in) #0 {
%vec = load <3 x i64>, <3 x i64> addrspace(1)* %in, align 4
store <3 x i64> %vec, <3 x i64> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}copy_v3f32_align4:
; GCN-NOT: SCRATCH_RSRC_DWORD
; GCN-DAG: buffer_load_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN-NOT: offen
; GCN: s_waitcnt vmcnt
; GCN-NOT: offen
; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
; GCN: ScratchSize: 0{{$}}
define void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x float> addrspace(1)* noalias %in) #0 {
%vec = load <3 x float>, <3 x float> addrspace(1)* %in, align 4
%fadd = fadd <3 x float> %vec, <float 1.0, float 2.0, float 4.0>
store <3 x float> %fadd, <3 x float> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}copy_v3f64_align4:
; GCN-NOT: SCRATCH_RSRC_DWORD
; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
; GCN-NOT: offen
; GCN: s_waitcnt vmcnt
; GCN-NOT: offen
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
; GCN: ScratchSize: 0{{$}}
define void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias %out, <3 x double> addrspace(1)* noalias %in) #0 {
%vec = load <3 x double>, <3 x double> addrspace(1)* %in, align 4
%fadd = fadd <3 x double> %vec, <double 1.0, double 2.0, double 4.0>
store <3 x double> %fadd, <3 x double> addrspace(1)* %out
ret void
}
declare void @llvm.AMDGPU.barrier.local() #1
attributes #0 = { nounwind }