forked from OSchip/llvm-project
Split sources for amdgcn and r600
Most files remain in a common amdgpu directory. Also switches barriers to to use convergent, and use llvm.amdgcn.s.barrier. This now requires 3.9/trunk to build amdgcn. llvm-svn: 260777
This commit is contained in:
parent
0a5e166a0b
commit
a48e15c6cb
|
@ -0,0 +1 @@
|
|||
synchronization/barrier_impl.ll
|
|
@ -0,0 +1,32 @@
|
|||
declare i32 @__clc_clk_local_mem_fence() #1
|
||||
declare i32 @__clc_clk_global_mem_fence() #1
|
||||
declare void @llvm.amdgcn.s.barrier() #0
|
||||
|
||||
define void @barrier(i32 %flags) #2 {
|
||||
barrier_local_test:
|
||||
%CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence()
|
||||
%0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE
|
||||
%1 = icmp ne i32 %0, 0
|
||||
br i1 %1, label %barrier_local, label %barrier_global_test
|
||||
|
||||
barrier_local:
|
||||
call void @llvm.amdgcn.s.barrier()
|
||||
br label %barrier_global_test
|
||||
|
||||
barrier_global_test:
|
||||
%CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence()
|
||||
%2 = and i32 %flags, %CLK_GLOBAL_MEM_FENCE
|
||||
%3 = icmp ne i32 %2, 0
|
||||
br i1 %3, label %barrier_global, label %done
|
||||
|
||||
barrier_global:
|
||||
call void @llvm.amdgcn.s.barrier()
|
||||
br label %done
|
||||
|
||||
done:
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind convergent }
|
||||
attributes #1 = { nounwind alwaysinline }
|
||||
attributes #2 = { nounwind convergent alwaysinline }
|
|
@ -0,0 +1,2 @@
|
|||
workitem/get_group_id.cl
|
||||
workitem/get_global_size.cl
|
|
@ -0,0 +1,25 @@
|
|||
atomic/atomic.cl
|
||||
math/ldexp.cl
|
||||
math/nextafter.cl
|
||||
math/sqrt.cl
|
||||
workitem/get_num_groups.ll
|
||||
workitem/get_group_id.ll
|
||||
workitem/get_local_size.ll
|
||||
workitem/get_local_id.ll
|
||||
workitem/get_global_size.ll
|
||||
workitem/get_work_dim.ll
|
||||
synchronization/barrier.cl
|
||||
image/get_image_width.cl
|
||||
image/get_image_height.cl
|
||||
image/get_image_depth.cl
|
||||
image/get_image_channel_data_type.cl
|
||||
image/get_image_channel_order.cl
|
||||
image/get_image_attributes_impl.ll
|
||||
image/read_imagef.cl
|
||||
image/read_imagei.cl
|
||||
image/read_imageui.cl
|
||||
image/read_image_impl.ll
|
||||
image/write_imagef.cl
|
||||
image/write_imagei.cl
|
||||
image/write_imageui.cl
|
||||
image/write_image_impl.ll
|
|
@ -69,8 +69,8 @@ llvm_version = string.split(string.replace(llvm_config(['--version']), 'svn', ''
|
|||
llvm_int_version = int(llvm_version[0]) * 100 + int(llvm_version[1]) * 10
|
||||
llvm_string_version = 'LLVM' + llvm_version[0] + '.' + llvm_version[1]
|
||||
|
||||
if llvm_int_version < 370:
|
||||
print "libclc requires LLVM >= 3.7"
|
||||
if llvm_int_version < 390:
|
||||
print "libclc requires LLVM >= 3.9"
|
||||
sys.exit(1)
|
||||
|
||||
llvm_system_libs = llvm_config(['--system-libs'])
|
||||
|
@ -175,8 +175,8 @@ for target in targets:
|
|||
subdirs.append("%s-%s-%s" % (arch, t_vendor, t_os))
|
||||
subdirs.append("%s-%s" % (arch, t_os))
|
||||
subdirs.append(arch)
|
||||
if arch == 'amdgcn':
|
||||
subdirs.append('r600')
|
||||
if arch == 'amdgcn' or arch == 'r600':
|
||||
subdirs.append('amdgpu')
|
||||
|
||||
incdirs = filter(os.path.isdir,
|
||||
[os.path.join(srcdir, subdir, 'include') for subdir in subdirs])
|
||||
|
|
|
@ -1,2 +0,0 @@
|
|||
workitem/get_group_id.cl
|
||||
workitem/get_global_size.cl
|
|
@ -1,26 +1 @@
|
|||
atomic/atomic.cl
|
||||
math/ldexp.cl
|
||||
math/nextafter.cl
|
||||
math/sqrt.cl
|
||||
workitem/get_num_groups.ll
|
||||
workitem/get_group_id.ll
|
||||
workitem/get_local_size.ll
|
||||
workitem/get_local_id.ll
|
||||
workitem/get_global_size.ll
|
||||
workitem/get_work_dim.ll
|
||||
synchronization/barrier.cl
|
||||
synchronization/barrier_impl.ll
|
||||
image/get_image_width.cl
|
||||
image/get_image_height.cl
|
||||
image/get_image_depth.cl
|
||||
image/get_image_channel_data_type.cl
|
||||
image/get_image_channel_order.cl
|
||||
image/get_image_attributes_impl.ll
|
||||
image/read_imagef.cl
|
||||
image/read_imagei.cl
|
||||
image/read_imageui.cl
|
||||
image/read_image_impl.ll
|
||||
image/write_imagef.cl
|
||||
image/write_imagei.cl
|
||||
image/write_imageui.cl
|
||||
image/write_image_impl.ll
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
declare i32 @__clc_clk_local_mem_fence() nounwind alwaysinline
|
||||
declare i32 @__clc_clk_global_mem_fence() nounwind alwaysinline
|
||||
declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
|
||||
declare void @llvm.AMDGPU.barrier.global() nounwind noduplicate
|
||||
declare i32 @__clc_clk_local_mem_fence() #1
|
||||
declare i32 @__clc_clk_global_mem_fence() #1
|
||||
declare void @llvm.AMDGPU.barrier.local() #0
|
||||
declare void @llvm.AMDGPU.barrier.global() #0
|
||||
|
||||
define void @barrier(i32 %flags) nounwind noduplicate alwaysinline {
|
||||
define void @barrier(i32 %flags) #2 {
|
||||
barrier_local_test:
|
||||
%CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence()
|
||||
%0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE
|
||||
|
@ -11,7 +11,7 @@ barrier_local_test:
|
|||
br i1 %1, label %barrier_local, label %barrier_global_test
|
||||
|
||||
barrier_local:
|
||||
call void @llvm.AMDGPU.barrier.local() noduplicate
|
||||
call void @llvm.AMDGPU.barrier.local()
|
||||
br label %barrier_global_test
|
||||
|
||||
barrier_global_test:
|
||||
|
@ -21,9 +21,13 @@ barrier_global_test:
|
|||
br i1 %3, label %barrier_global, label %done
|
||||
|
||||
barrier_global:
|
||||
call void @llvm.AMDGPU.barrier.global() noduplicate
|
||||
call void @llvm.AMDGPU.barrier.global()
|
||||
br label %done
|
||||
|
||||
done:
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind convergent }
|
||||
attributes #1 = { nounwind alwaysinline }
|
||||
attributes #2 = { nounwind convergent alwaysinline }
|
||||
|
|
Loading…
Reference in New Issue