2020-11-11 23:43:00 +08:00
|
|
|
// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - -fno-experimental-new-pass-manager | opt -instnamer -S | FileCheck -enable-var-scope %s
|
|
|
|
// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - -fexperimental-new-pass-manager | opt -instnamer -S | FileCheck -enable-var-scope %s
|
2017-10-07 03:34:40 +08:00
|
|
|
|
|
|
|
// This is initially assumed convergent, but can be deduced to not require it.
|
|
|
|
|
2021-07-09 19:13:34 +08:00
|
|
|
// CHECK-LABEL: define{{.*}} spir_func void @non_convfun(i32* %p) local_unnamed_addr #0
|
2017-10-07 03:34:40 +08:00
|
|
|
// CHECK: ret void
|
|
|
|
__attribute__((noinline))
|
2021-07-09 19:13:34 +08:00
|
|
|
void non_convfun(volatile int* p) {
|
2017-10-07 03:34:40 +08:00
|
|
|
*p = 0;
|
|
|
|
}
|
2016-11-02 02:45:32 +08:00
|
|
|
|
|
|
|
void convfun(void) __attribute__((convergent));
|
|
|
|
void nodupfun(void) __attribute__((noduplicate));
|
|
|
|
|
2017-10-07 03:34:40 +08:00
|
|
|
// External functions should be assumed convergent.
|
2016-11-02 02:45:32 +08:00
|
|
|
void f(void);
|
|
|
|
void g(void);
|
|
|
|
|
|
|
|
// Test two if's are merged and non_convfun duplicated.
|
|
|
|
// The LLVM IR is equivalent to:
|
|
|
|
// if (a) {
|
|
|
|
// f();
|
|
|
|
// non_convfun();
|
|
|
|
// g();
|
|
|
|
// } else {
|
|
|
|
// non_convfun();
|
|
|
|
// }
|
|
|
|
//
|
2021-07-09 19:13:34 +08:00
|
|
|
// CHECK-LABEL: define{{.*}} spir_func void @test_merge_if(i32 %a, i32* %p) local_unnamed_addr #1 {
|
2017-10-07 03:34:40 +08:00
|
|
|
// CHECK: %[[tobool:.+]] = icmp eq i32 %a, 0
|
2016-11-02 02:45:32 +08:00
|
|
|
// CHECK: br i1 %[[tobool]], label %[[if_end3_critedge:.+]], label %[[if_then:.+]]
|
2017-10-07 03:34:40 +08:00
|
|
|
|
2016-11-02 02:45:32 +08:00
|
|
|
// CHECK: [[if_then]]:
|
|
|
|
// CHECK: tail call spir_func void @f()
|
2021-07-09 19:13:34 +08:00
|
|
|
// CHECK: tail call spir_func void @non_convfun(i32* %p)
|
2016-11-02 02:45:32 +08:00
|
|
|
// CHECK: tail call spir_func void @g()
|
2017-10-07 03:34:40 +08:00
|
|
|
|
2016-11-02 02:45:32 +08:00
|
|
|
// CHECK: br label %[[if_end3:.+]]
|
2017-10-07 03:34:40 +08:00
|
|
|
|
2016-11-02 02:45:32 +08:00
|
|
|
// CHECK: [[if_end3_critedge]]:
|
2021-07-09 19:13:34 +08:00
|
|
|
// CHECK: tail call spir_func void @non_convfun(i32* %p)
|
2016-11-02 02:45:32 +08:00
|
|
|
// CHECK: br label %[[if_end3]]
|
2017-10-07 03:34:40 +08:00
|
|
|
|
2016-11-02 02:45:32 +08:00
|
|
|
// CHECK: [[if_end3]]:
|
2017-10-07 03:34:40 +08:00
|
|
|
// CHECK: ret void
|
2016-11-02 02:45:32 +08:00
|
|
|
|
2021-07-09 19:13:34 +08:00
|
|
|
void test_merge_if(int a, volatile int* p) {
|
2016-11-02 02:45:32 +08:00
|
|
|
if (a) {
|
|
|
|
f();
|
|
|
|
}
|
2021-07-09 19:13:34 +08:00
|
|
|
non_convfun(p);
|
2016-11-02 02:45:32 +08:00
|
|
|
if (a) {
|
|
|
|
g();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-07 03:34:40 +08:00
|
|
|
// CHECK-DAG: declare spir_func void @f() local_unnamed_addr #2
|
|
|
|
// CHECK-DAG: declare spir_func void @g() local_unnamed_addr #2
|
|
|
|
|
2016-11-02 02:45:32 +08:00
|
|
|
|
|
|
|
// Test two if's are not merged.
|
2020-12-31 16:27:11 +08:00
|
|
|
// CHECK-LABEL: define{{.*}} spir_func void @test_no_merge_if(i32 %a) local_unnamed_addr #1
|
2017-10-07 03:34:40 +08:00
|
|
|
// CHECK: %[[tobool:.+]] = icmp eq i32 %a, 0
|
2016-11-02 02:45:32 +08:00
|
|
|
// CHECK: br i1 %[[tobool]], label %[[if_end:.+]], label %[[if_then:.+]]
|
|
|
|
// CHECK: [[if_then]]:
|
|
|
|
// CHECK: tail call spir_func void @f()
|
|
|
|
// CHECK-NOT: call spir_func void @convfun()
|
|
|
|
// CHECK-NOT: call spir_func void @g()
|
|
|
|
// CHECK: br label %[[if_end]]
|
|
|
|
// CHECK: [[if_end]]:
|
2020-07-22 14:14:50 +08:00
|
|
|
// CHECK-NOT: phi i1
|
2017-10-07 03:34:40 +08:00
|
|
|
// CHECK: tail call spir_func void @convfun() #[[attr4:.+]]
|
2020-07-22 14:14:50 +08:00
|
|
|
// CHECK: br i1 %[[tobool]], label %[[if_end3:.+]], label %[[if_then2:.+]]
|
2016-11-02 02:45:32 +08:00
|
|
|
// CHECK: [[if_then2]]:
|
|
|
|
// CHECK: tail call spir_func void @g()
|
|
|
|
// CHECK: br label %[[if_end3:.+]]
|
|
|
|
// CHECK: [[if_end3]]:
|
|
|
|
// CHECK-LABEL: ret void
|
|
|
|
|
|
|
|
void test_no_merge_if(int a) {
|
|
|
|
if (a) {
|
|
|
|
f();
|
|
|
|
}
|
|
|
|
convfun();
|
|
|
|
if(a) {
|
|
|
|
g();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-07 03:34:40 +08:00
|
|
|
// CHECK: declare spir_func void @convfun(){{[^#]*}} #2
|
2016-11-02 02:45:32 +08:00
|
|
|
|
|
|
|
// Test loop is unrolled for convergent function.
|
2020-12-31 16:27:11 +08:00
|
|
|
// CHECK-LABEL: define{{.*}} spir_func void @test_unroll() local_unnamed_addr #1
|
2017-10-07 03:34:40 +08:00
|
|
|
// CHECK: tail call spir_func void @convfun() #[[attr4:[0-9]+]]
|
|
|
|
// CHECK: tail call spir_func void @convfun() #[[attr4]]
|
|
|
|
// CHECK: tail call spir_func void @convfun() #[[attr4]]
|
|
|
|
// CHECK: tail call spir_func void @convfun() #[[attr4]]
|
|
|
|
// CHECK: tail call spir_func void @convfun() #[[attr4]]
|
|
|
|
// CHECK: tail call spir_func void @convfun() #[[attr4]]
|
|
|
|
// CHECK: tail call spir_func void @convfun() #[[attr4]]
|
|
|
|
// CHECK: tail call spir_func void @convfun() #[[attr4]]
|
|
|
|
// CHECK: tail call spir_func void @convfun() #[[attr4]]
|
|
|
|
// CHECK: tail call spir_func void @convfun() #[[attr4]]
|
2016-11-02 02:45:32 +08:00
|
|
|
// CHECK-LABEL: ret void
|
|
|
|
|
|
|
|
void test_unroll() {
|
|
|
|
for (int i = 0; i < 10; i++)
|
|
|
|
convfun();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Test loop is not unrolled for noduplicate function.
|
2020-12-31 16:27:11 +08:00
|
|
|
// CHECK-LABEL: define{{.*}} spir_func void @test_not_unroll()
|
2016-11-02 02:45:32 +08:00
|
|
|
// CHECK: br label %[[for_body:.+]]
|
|
|
|
// CHECK: [[for_cond_cleanup:.+]]:
|
|
|
|
// CHECK: ret void
|
|
|
|
// CHECK: [[for_body]]:
|
2017-10-07 03:34:40 +08:00
|
|
|
// CHECK: tail call spir_func void @nodupfun() #[[attr5:[0-9]+]]
|
2016-11-02 02:45:32 +08:00
|
|
|
// CHECK-NOT: call spir_func void @nodupfun()
|
2020-11-11 23:43:00 +08:00
|
|
|
// CHECK: br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]]
|
2016-11-02 02:45:32 +08:00
|
|
|
|
|
|
|
void test_not_unroll() {
|
|
|
|
for (int i = 0; i < 10; i++)
|
|
|
|
nodupfun();
|
|
|
|
}
|
|
|
|
|
|
|
|
// CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]]
|
|
|
|
|
2017-11-14 06:40:55 +08:00
|
|
|
// CHECK-LABEL: @assume_convergent_asm
|
[OpenCL] Add '-cl-uniform-work-group-size' compile option
Summary:
OpenCL 2.0 specification defines '-cl-uniform-work-group-size' option,
which requires that the global work-size be a multiple of the work-group
size specified to clEnqueueNDRangeKernel and allows optimizations that
are made possible by this restriction.
The patch introduces the support of this option.
To keep information about whether an OpenCL kernel has uniform work
group size or not, clang generates 'uniform-work-group-size' function
attribute for every kernel:
- "uniform-work-group-size"="true" for OpenCL 1.2 and lower,
- "uniform-work-group-size"="true" for OpenCL 2.0 and higher if
'-cl-uniform-work-group-size' option was specified,
- "uniform-work-group-size"="false" for OpenCL 2.0 and higher if no
'-cl-uniform-work-group-size' options was specified.
If the function is not an OpenCL kernel, 'uniform-work-group-size'
attribute isn't generated.
Patch by: krisb
Reviewers: yaxunl, Anastasia, b-sumner
Reviewed By: yaxunl, Anastasia
Subscribers: nhaehnle, yaxunl, Anastasia, cfe-commits
Differential Revision: https://reviews.llvm.org/D43570
llvm-svn: 325771
2018-02-22 19:54:14 +08:00
|
|
|
// CHECK: tail call void asm sideeffect "s_barrier", ""() #5
|
2017-11-14 06:40:55 +08:00
|
|
|
kernel void assume_convergent_asm()
|
|
|
|
{
|
|
|
|
__asm__ volatile("s_barrier");
|
|
|
|
}
|
|
|
|
|
[funcattrs] Add the maximal set of implied attributes to definitions
Have funcattrs expand all implied attributes into the IR. This expands the infrastructure from D100400, but for definitions not declarations this time.
Somewhat subtly, this mostly isn't semantic. Because the accessors did the inference, any client which used the accessor was already getting the stronger result. Clients that directly checked presence of attributes (there are some), will see a stronger result now.
The old behavior can end up quite confusing for two reasons:
* Without this change, we have situations where function-attrs appears to fail when inferring an attribute (as seen by a human reading IR), but that consuming code will see that it should have been implied. As a human trying to sanity check test results and study IR for optimization possibilities, this is exceeding error prone and confusing. (I'll note that I wasted several hours recently because of this.)
* We can have transforms which trigger without the IR appearing (on inspection) to meet the preconditions. This change doesn't prevent this from happening (as the accessors still involve multiple checks), but it should make it less frequent.
I'd argue in favor of deleting the extra checks out of the accessors after this lands, but I want that in it's own review as a) it's purely stylistic, and b) I already know there's some disagreement.
Once this lands, I'm also going to do a cleanup change which will delete some now redundant duplicate predicates in the inference code, but again, that deserves to be a change of it's own.
Differential Revision: https://reviews.llvm.org/D100226
2021-04-17 05:03:36 +08:00
|
|
|
// CHECK: attributes #0 = { nofree noinline norecurse nounwind willreturn mustprogress "
|
2017-10-07 03:34:40 +08:00
|
|
|
// CHECK: attributes #1 = { {{[^}]*}}convergent{{[^}]*}} }
|
|
|
|
// CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} }
|
|
|
|
// CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
|
|
|
|
// CHECK: attributes #4 = { {{[^}]*}}convergent{{[^}]*}} }
|
[OpenCL] Add '-cl-uniform-work-group-size' compile option
Summary:
OpenCL 2.0 specification defines '-cl-uniform-work-group-size' option,
which requires that the global work-size be a multiple of the work-group
size specified to clEnqueueNDRangeKernel and allows optimizations that
are made possible by this restriction.
The patch introduces the support of this option.
To keep information about whether an OpenCL kernel has uniform work
group size or not, clang generates 'uniform-work-group-size' function
attribute for every kernel:
- "uniform-work-group-size"="true" for OpenCL 1.2 and lower,
- "uniform-work-group-size"="true" for OpenCL 2.0 and higher if
'-cl-uniform-work-group-size' option was specified,
- "uniform-work-group-size"="false" for OpenCL 2.0 and higher if no
'-cl-uniform-work-group-size' options was specified.
If the function is not an OpenCL kernel, 'uniform-work-group-size'
attribute isn't generated.
Patch by: krisb
Reviewers: yaxunl, Anastasia, b-sumner
Reviewed By: yaxunl, Anastasia
Subscribers: nhaehnle, yaxunl, Anastasia, cfe-commits
Differential Revision: https://reviews.llvm.org/D43570
llvm-svn: 325771
2018-02-22 19:54:14 +08:00
|
|
|
// CHECK: attributes #5 = { {{[^}]*}}convergent{{[^}]*}} }
|
|
|
|
// CHECK: attributes #6 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
|