2019-06-25 00:49:18 +08:00
|
|
|
// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - -fno-experimental-new-pass-manager | opt -instnamer -S | FileCheck -enable-var-scope %s --check-prefixes=CHECK,CHECK-LEGACY
|
|
|
|
// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - -fexperimental-new-pass-manager | opt -instnamer -S | FileCheck -enable-var-scope %s --check-prefixes=CHECK,CHECK-NEWPM
|
2017-10-07 03:34:40 +08:00
|
|
|
|
|
|
|
// This is initially assumed convergent, but can be deduced to not require it.
|
|
|
|
|
|
|
|
// CHECK-LABEL: define spir_func void @non_convfun() local_unnamed_addr #0
|
|
|
|
// CHECK: ret void
|
|
|
|
__attribute__((noinline))
|
|
|
|
void non_convfun(void) {
|
|
|
|
volatile int* p;
|
|
|
|
*p = 0;
|
|
|
|
}
|
2016-11-02 02:45:32 +08:00
|
|
|
|
|
|
|
void convfun(void) __attribute__((convergent));
|
|
|
|
void nodupfun(void) __attribute__((noduplicate));
|
|
|
|
|
2017-10-07 03:34:40 +08:00
|
|
|
// External functions should be assumed convergent.
|
2016-11-02 02:45:32 +08:00
|
|
|
void f(void);
|
|
|
|
void g(void);
|
|
|
|
|
|
|
|
// Test two if's are merged and non_convfun duplicated.
|
|
|
|
// The LLVM IR is equivalent to:
|
|
|
|
// if (a) {
|
|
|
|
// f();
|
|
|
|
// non_convfun();
|
|
|
|
// g();
|
|
|
|
// } else {
|
|
|
|
// non_convfun();
|
|
|
|
// }
|
|
|
|
//
|
2017-10-07 03:34:40 +08:00
|
|
|
// CHECK-LABEL: define spir_func void @test_merge_if(i32 %a) local_unnamed_addr #1 {
|
|
|
|
// CHECK: %[[tobool:.+]] = icmp eq i32 %a, 0
|
2016-11-02 02:45:32 +08:00
|
|
|
// CHECK: br i1 %[[tobool]], label %[[if_end3_critedge:.+]], label %[[if_then:.+]]
|
2017-10-07 03:34:40 +08:00
|
|
|
|
2016-11-02 02:45:32 +08:00
|
|
|
// CHECK: [[if_then]]:
|
|
|
|
// CHECK: tail call spir_func void @f()
|
|
|
|
// CHECK: tail call spir_func void @non_convfun()
|
|
|
|
// CHECK: tail call spir_func void @g()
|
2017-10-07 03:34:40 +08:00
|
|
|
|
2016-11-02 02:45:32 +08:00
|
|
|
// CHECK: br label %[[if_end3:.+]]
|
2017-10-07 03:34:40 +08:00
|
|
|
|
2016-11-02 02:45:32 +08:00
|
|
|
// CHECK: [[if_end3_critedge]]:
|
|
|
|
// CHECK: tail call spir_func void @non_convfun()
|
|
|
|
// CHECK: br label %[[if_end3]]
|
2017-10-07 03:34:40 +08:00
|
|
|
|
2016-11-02 02:45:32 +08:00
|
|
|
// CHECK: [[if_end3]]:
|
2017-10-07 03:34:40 +08:00
|
|
|
// CHECK: ret void
|
2016-11-02 02:45:32 +08:00
|
|
|
|
|
|
|
void test_merge_if(int a) {
|
|
|
|
if (a) {
|
|
|
|
f();
|
|
|
|
}
|
|
|
|
non_convfun();
|
|
|
|
if (a) {
|
|
|
|
g();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-07 03:34:40 +08:00
|
|
|
// CHECK-DAG: declare spir_func void @f() local_unnamed_addr #2
|
|
|
|
// CHECK-DAG: declare spir_func void @g() local_unnamed_addr #2
|
|
|
|
|
2016-11-02 02:45:32 +08:00
|
|
|
|
|
|
|
// Test two if's are not merged.
|
2017-10-07 03:34:40 +08:00
|
|
|
// CHECK-LABEL: define spir_func void @test_no_merge_if(i32 %a) local_unnamed_addr #1
|
|
|
|
// CHECK: %[[tobool:.+]] = icmp eq i32 %a, 0
|
2016-11-02 02:45:32 +08:00
|
|
|
// CHECK: br i1 %[[tobool]], label %[[if_end:.+]], label %[[if_then:.+]]
|
|
|
|
// CHECK: [[if_then]]:
|
|
|
|
// CHECK: tail call spir_func void @f()
|
|
|
|
// CHECK-NOT: call spir_func void @convfun()
|
|
|
|
// CHECK-NOT: call spir_func void @g()
|
|
|
|
// CHECK: br label %[[if_end]]
|
|
|
|
// CHECK: [[if_end]]:
|
|
|
|
// CHECK: %[[tobool_pr:.+]] = phi i1 [ true, %[[if_then]] ], [ false, %{{.+}} ]
|
2017-10-07 03:34:40 +08:00
|
|
|
// CHECK: tail call spir_func void @convfun() #[[attr4:.+]]
|
2016-11-02 02:45:32 +08:00
|
|
|
// CHECK: br i1 %[[tobool_pr]], label %[[if_then2:.+]], label %[[if_end3:.+]]
|
|
|
|
// CHECK: [[if_then2]]:
|
|
|
|
// CHECK: tail call spir_func void @g()
|
|
|
|
// CHECK: br label %[[if_end3:.+]]
|
|
|
|
// CHECK: [[if_end3]]:
|
|
|
|
// CHECK-LABEL: ret void
|
|
|
|
|
|
|
|
void test_no_merge_if(int a) {
|
|
|
|
if (a) {
|
|
|
|
f();
|
|
|
|
}
|
|
|
|
convfun();
|
|
|
|
if(a) {
|
|
|
|
g();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-07 03:34:40 +08:00
|
|
|
// CHECK: declare spir_func void @convfun(){{[^#]*}} #2
|
2016-11-02 02:45:32 +08:00
|
|
|
|
|
|
|
// Test loop is unrolled for convergent function.
|
2017-10-07 03:34:40 +08:00
|
|
|
// CHECK-LABEL: define spir_func void @test_unroll() local_unnamed_addr #1
|
|
|
|
// CHECK: tail call spir_func void @convfun() #[[attr4:[0-9]+]]
|
|
|
|
// CHECK: tail call spir_func void @convfun() #[[attr4]]
|
|
|
|
// CHECK: tail call spir_func void @convfun() #[[attr4]]
|
|
|
|
// CHECK: tail call spir_func void @convfun() #[[attr4]]
|
|
|
|
// CHECK: tail call spir_func void @convfun() #[[attr4]]
|
|
|
|
// CHECK: tail call spir_func void @convfun() #[[attr4]]
|
|
|
|
// CHECK: tail call spir_func void @convfun() #[[attr4]]
|
|
|
|
// CHECK: tail call spir_func void @convfun() #[[attr4]]
|
|
|
|
// CHECK: tail call spir_func void @convfun() #[[attr4]]
|
|
|
|
// CHECK: tail call spir_func void @convfun() #[[attr4]]
|
2016-11-02 02:45:32 +08:00
|
|
|
// CHECK-LABEL: ret void
|
|
|
|
|
|
|
|
void test_unroll() {
|
|
|
|
for (int i = 0; i < 10; i++)
|
|
|
|
convfun();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Test loop is not unrolled for noduplicate function.
|
|
|
|
// CHECK-LABEL: define spir_func void @test_not_unroll()
|
|
|
|
// CHECK: br label %[[for_body:.+]]
|
|
|
|
// CHECK: [[for_cond_cleanup:.+]]:
|
|
|
|
// CHECK: ret void
|
|
|
|
// CHECK: [[for_body]]:
|
2017-10-07 03:34:40 +08:00
|
|
|
// CHECK: tail call spir_func void @nodupfun() #[[attr5:[0-9]+]]
|
2016-11-02 02:45:32 +08:00
|
|
|
// CHECK-NOT: call spir_func void @nodupfun()
|
2019-06-25 00:49:18 +08:00
|
|
|
|
|
|
|
// The new PM produces a slightly different IR for the loop from the legacy PM,
|
|
|
|
// but the test still checks that the loop is not unrolled.
|
|
|
|
// CHECK-LEGACY: br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]]
|
|
|
|
// CHECK-NEW: br i1 %{{.+}}, label %[[for_body_crit_edge:.+]], label %[[for_cond_cleanup]]
|
|
|
|
// CHECK-NEW: [[for_body_crit_edge]]:
|
2016-11-02 02:45:32 +08:00
|
|
|
|
|
|
|
void test_not_unroll() {
|
|
|
|
for (int i = 0; i < 10; i++)
|
|
|
|
nodupfun();
|
|
|
|
}
|
|
|
|
|
|
|
|
// CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]]
|
|
|
|
|
2017-11-14 06:40:55 +08:00
|
|
|
// CHECK-LABEL: @assume_convergent_asm
|
[OpenCL] Add '-cl-uniform-work-group-size' compile option
Summary:
OpenCL 2.0 specification defines '-cl-uniform-work-group-size' option,
which requires that the global work-size be a multiple of the work-group
size specified to clEnqueueNDRangeKernel and allows optimizations that
are made possible by this restriction.
The patch introduces the support of this option.
To keep information about whether an OpenCL kernel has uniform work
group size or not, clang generates 'uniform-work-group-size' function
attribute for every kernel:
- "uniform-work-group-size"="true" for OpenCL 1.2 and lower,
- "uniform-work-group-size"="true" for OpenCL 2.0 and higher if
'-cl-uniform-work-group-size' option was specified,
- "uniform-work-group-size"="false" for OpenCL 2.0 and higher if no
'-cl-uniform-work-group-size' options was specified.
If the function is not an OpenCL kernel, 'uniform-work-group-size'
attribute isn't generated.
Patch by: krisb
Reviewers: yaxunl, Anastasia, b-sumner
Reviewed By: yaxunl, Anastasia
Subscribers: nhaehnle, yaxunl, Anastasia, cfe-commits
Differential Revision: https://reviews.llvm.org/D43570
llvm-svn: 325771
2018-02-22 19:54:14 +08:00
|
|
|
// CHECK: tail call void asm sideeffect "s_barrier", ""() #5
|
2017-11-14 06:40:55 +08:00
|
|
|
kernel void assume_convergent_asm()
|
|
|
|
{
|
|
|
|
__asm__ volatile("s_barrier");
|
|
|
|
}
|
|
|
|
|
2019-07-09 00:24:10 +08:00
|
|
|
// CHECK: attributes #0 = { nofree noinline norecurse nounwind "
|
2017-10-07 03:34:40 +08:00
|
|
|
// CHECK: attributes #1 = { {{[^}]*}}convergent{{[^}]*}} }
|
|
|
|
// CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} }
|
|
|
|
// CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
|
|
|
|
// CHECK: attributes #4 = { {{[^}]*}}convergent{{[^}]*}} }
|
[OpenCL] Add '-cl-uniform-work-group-size' compile option
Summary:
OpenCL 2.0 specification defines '-cl-uniform-work-group-size' option,
which requires that the global work-size be a multiple of the work-group
size specified to clEnqueueNDRangeKernel and allows optimizations that
are made possible by this restriction.
The patch introduces the support of this option.
To keep information about whether an OpenCL kernel has uniform work
group size or not, clang generates 'uniform-work-group-size' function
attribute for every kernel:
- "uniform-work-group-size"="true" for OpenCL 1.2 and lower,
- "uniform-work-group-size"="true" for OpenCL 2.0 and higher if
'-cl-uniform-work-group-size' option was specified,
- "uniform-work-group-size"="false" for OpenCL 2.0 and higher if no
'-cl-uniform-work-group-size' options was specified.
If the function is not an OpenCL kernel, 'uniform-work-group-size'
attribute isn't generated.
Patch by: krisb
Reviewers: yaxunl, Anastasia, b-sumner
Reviewed By: yaxunl, Anastasia
Subscribers: nhaehnle, yaxunl, Anastasia, cfe-commits
Differential Revision: https://reviews.llvm.org/D43570
llvm-svn: 325771
2018-02-22 19:54:14 +08:00
|
|
|
// CHECK: attributes #5 = { {{[^}]*}}convergent{{[^}]*}} }
|
|
|
|
// CHECK: attributes #6 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
|