forked from OSchip/llvm-project
AMDGPU: Add some tests for promotion of v2i64 scalar_to_vector
llvm-svn: 254087
This commit is contained in:
parent
61001bbc03
commit
d179481857
|
@ -634,6 +634,77 @@ define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; This requires handling of scalar_to_vector for v2i64 to avoid
|
||||
; scratch usage.
|
||||
; FIXME: Should do single load and store
|
||||
|
||||
; GCN-LABEL: {{^}}copy_v3i32_align4:
|
||||
; GCN-NOT: SCRATCH_RSRC_DWORD
|
||||
; GCN-DAG: buffer_load_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
|
||||
; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
; GCN-NOT: offen
|
||||
; GCN: s_waitcnt vmcnt
|
||||
; GCN-NOT: offen
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
|
||||
|
||||
; GCN: ScratchSize: 0{{$}}
|
||||
define void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> addrspace(1)* noalias %in) #0 {
|
||||
%vec = load <3 x i32>, <3 x i32> addrspace(1)* %in, align 4
|
||||
store <3 x i32> %vec, <3 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}copy_v3i64_align4:
|
||||
; GCN-NOT: SCRATCH_RSRC_DWORD
|
||||
; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
|
||||
; GCN-NOT: offen
|
||||
; GCN: s_waitcnt vmcnt
|
||||
; GCN-NOT: offen
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
|
||||
; GCN: ScratchSize: 0{{$}}
|
||||
define void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> addrspace(1)* noalias %in) #0 {
|
||||
%vec = load <3 x i64>, <3 x i64> addrspace(1)* %in, align 4
|
||||
store <3 x i64> %vec, <3 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}copy_v3f32_align4:
|
||||
; GCN-NOT: SCRATCH_RSRC_DWORD
|
||||
; GCN-DAG: buffer_load_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
|
||||
; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
; GCN-NOT: offen
|
||||
; GCN: s_waitcnt vmcnt
|
||||
; GCN-NOT: offen
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
|
||||
; GCN: ScratchSize: 0{{$}}
|
||||
define void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x float> addrspace(1)* noalias %in) #0 {
|
||||
%vec = load <3 x float>, <3 x float> addrspace(1)* %in, align 4
|
||||
%fadd = fadd <3 x float> %vec, <float 1.0, float 2.0, float 4.0>
|
||||
store <3 x float> %fadd, <3 x float> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}copy_v3f64_align4:
|
||||
; GCN-NOT: SCRATCH_RSRC_DWORD
|
||||
; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
|
||||
; GCN-NOT: offen
|
||||
; GCN: s_waitcnt vmcnt
|
||||
; GCN-NOT: offen
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
|
||||
; GCN: ScratchSize: 0{{$}}
|
||||
define void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias %out, <3 x double> addrspace(1)* noalias %in) #0 {
|
||||
%vec = load <3 x double>, <3 x double> addrspace(1)* %in, align 4
|
||||
%fadd = fadd <3 x double> %vec, <double 1.0, double 2.0, double 4.0>
|
||||
store <3 x double> %fadd, <3 x double> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.AMDGPU.barrier.local() #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
|
Loading…
Reference in New Issue