From d2e83929a96630c8d790843865e6671d1addbae2 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 31 Oct 2013 15:50:48 +0000 Subject: [PATCH] R600: Set the noduplicate attribute on barrier() intrinsics This will prevent LLVM optimization passes from creating illegal uses of the barrier() intrinsic (e.g. calling barrier() from a conditional that is not executed by all threads). llvm-svn: 193753 --- libclc/r600/lib/SOURCES | 1 - libclc/r600/lib/synchronization/barrier.cl | 17 ++++----- .../r600/lib/synchronization/barrier_impl.ll | 35 ++++++++++++++----- 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/libclc/r600/lib/SOURCES b/libclc/r600/lib/SOURCES index aac6d8f9a74f..d9fc897dc7d2 100644 --- a/libclc/r600/lib/SOURCES +++ b/libclc/r600/lib/SOURCES @@ -8,4 +8,3 @@ workitem/get_global_size.ll synchronization/barrier.cl synchronization/barrier_impl.ll shared/vload.cl -shared/vstore.cl \ No newline at end of file diff --git a/libclc/r600/lib/synchronization/barrier.cl b/libclc/r600/lib/synchronization/barrier.cl index ac0b4b3b2bec..6f2900b06eef 100644 --- a/libclc/r600/lib/synchronization/barrier.cl +++ b/libclc/r600/lib/synchronization/barrier.cl @@ -1,15 +1,10 @@ #include -void barrier_local(void); -void barrier_global(void); - -void barrier(cl_mem_fence_flags flags) { - if (flags & CLK_LOCAL_MEM_FENCE) { - barrier_local(); - } - - if (flags & CLK_GLOBAL_MEM_FENCE) { - barrier_global(); - } +_CLC_DEF int __clc_clk_local_mem_fence() { + return CLK_LOCAL_MEM_FENCE; +} + +_CLC_DEF int __clc_clk_global_mem_fence() { + return CLK_GLOBAL_MEM_FENCE; } diff --git a/libclc/r600/lib/synchronization/barrier_impl.ll b/libclc/r600/lib/synchronization/barrier_impl.ll index 99ac01856e2b..3d8ee66bab6e 100644 --- a/libclc/r600/lib/synchronization/barrier_impl.ll +++ b/libclc/r600/lib/synchronization/barrier_impl.ll @@ -1,12 +1,29 @@ -declare void @llvm.AMDGPU.barrier.local() nounwind -declare void @llvm.AMDGPU.barrier.global() nounwind +declare i32 @__clc_clk_local_mem_fence() nounwind alwaysinline +declare i32 @__clc_clk_global_mem_fence() nounwind alwaysinline +declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate +declare void @llvm.AMDGPU.barrier.global() nounwind noduplicate -define void @barrier_local() nounwind alwaysinline { - call void @llvm.AMDGPU.barrier.local() - ret void -} - -define void @barrier_global() nounwind alwaysinline { - call void @llvm.AMDGPU.barrier.global() +define void @barrier(i32 %flags) nounwind noduplicate alwaysinline { +barrier_local_test: + %CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence() + %0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE + %1 = icmp ne i32 %0, 0 + br i1 %1, label %barrier_local, label %barrier_global_test + +barrier_local: + call void @llvm.AMDGPU.barrier.local() noduplicate + br label %barrier_global_test + +barrier_global_test: + %CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence() + %2 = and i32 %flags, %CLK_GLOBAL_MEM_FENCE + %3 = icmp ne i32 %2, 0 + br i1 %3, label %barrier_global, label %done + +barrier_global: + call void @llvm.AMDGPU.barrier.global() noduplicate + br label %done + +done: ret void }