2016-09-26 09:02:57 +08:00
|
|
|
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s
|
|
|
|
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86 %s
|
|
|
|
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(0, 0))) // expected-no-diagnostics
|
|
|
|
kernel void flat_work_group_size_0_0() {}
|
|
|
|
__attribute__((amdgpu_waves_per_eu(0))) // expected-no-diagnostics
|
|
|
|
kernel void waves_per_eu_0() {}
|
|
|
|
__attribute__((amdgpu_waves_per_eu(0, 0))) // expected-no-diagnostics
|
|
|
|
kernel void waves_per_eu_0_0() {}
|
|
|
|
__attribute__((amdgpu_num_sgpr(0))) // expected-no-diagnostics
|
|
|
|
kernel void num_sgpr0() {}
|
|
|
|
__attribute__((amdgpu_num_vgpr(0))) // expected-no-diagnostics
|
|
|
|
kernel void num_vgpr0() {}
|
|
|
|
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0))) // expected-no-diagnostics
|
|
|
|
kernel void flat_work_group_size_0_0_waves_per_eu_0() {}
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0))) // expected-no-diagnostics
|
|
|
|
kernel void flat_work_group_size_0_0_waves_per_eu_0_0() {}
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
|
|
|
|
kernel void flat_work_group_size_0_0_num_sgpr_0() {}
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
|
|
|
|
kernel void flat_work_group_size_0_0_num_vgpr_0() {}
|
|
|
|
__attribute__((amdgpu_waves_per_eu(0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
|
|
|
|
kernel void waves_per_eu_0_num_sgpr_0() {}
|
|
|
|
__attribute__((amdgpu_waves_per_eu(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
|
|
|
|
kernel void waves_per_eu_0_num_vgpr_0() {}
|
|
|
|
__attribute__((amdgpu_waves_per_eu(0, 0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
|
|
|
|
kernel void waves_per_eu_0_0_num_sgpr_0() {}
|
|
|
|
__attribute__((amdgpu_waves_per_eu(0, 0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
|
|
|
|
kernel void waves_per_eu_0_0_num_vgpr_0() {}
|
|
|
|
__attribute__((amdgpu_num_sgpr(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
|
|
|
|
kernel void num_sgpr_0_num_vgpr_0() {}
|
|
|
|
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
|
|
|
|
kernel void flat_work_group_size_0_0_waves_per_eu_0_num_sgpr_0() {}
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
|
|
|
|
kernel void flat_work_group_size_0_0_waves_per_eu_0_num_vgpr_0() {}
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
|
|
|
|
kernel void flat_work_group_size_0_0_waves_per_eu_0_0_num_sgpr_0() {}
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
|
|
|
|
kernel void flat_work_group_size_0_0_waves_per_eu_0_0_num_vgpr_0() {}
|
|
|
|
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0), amdgpu_num_sgpr(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
|
|
|
|
kernel void flat_work_group_size_0_0_waves_per_eu_0_num_sgpr_0_num_vgpr_0() {}
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0), amdgpu_num_sgpr(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
|
|
|
|
kernel void flat_work_group_size_0_0_waves_per_eu_0_0_num_sgpr_0_num_vgpr_0() {}
|
|
|
|
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics
|
|
|
|
kernel void flat_work_group_size_32_64() {
|
|
|
|
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]]
|
|
|
|
}
|
|
|
|
__attribute__((amdgpu_waves_per_eu(2))) // expected-no-diagnostics
|
|
|
|
kernel void waves_per_eu_2() {
|
|
|
|
// CHECK: define amdgpu_kernel void @waves_per_eu_2() [[WAVES_PER_EU_2:#[0-9]+]]
|
|
|
|
}
|
|
|
|
__attribute__((amdgpu_waves_per_eu(2, 4))) // expected-no-diagnostics
|
|
|
|
kernel void waves_per_eu_2_4() {
|
|
|
|
// CHECK: define amdgpu_kernel void @waves_per_eu_2_4() [[WAVES_PER_EU_2_4:#[0-9]+]]
|
|
|
|
}
|
|
|
|
__attribute__((amdgpu_num_sgpr(32))) // expected-no-diagnostics
|
|
|
|
kernel void num_sgpr_32() {
|
|
|
|
// CHECK: define amdgpu_kernel void @num_sgpr_32() [[NUM_SGPR_32:#[0-9]+]]
|
|
|
|
}
|
|
|
|
__attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics
|
|
|
|
kernel void num_vgpr_64() {
|
|
|
|
// CHECK: define amdgpu_kernel void @num_vgpr_64() [[NUM_VGPR_64:#[0-9]+]]
|
|
|
|
}
|
|
|
|
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2))) // expected-no-diagnostics
|
|
|
|
kernel void flat_work_group_size_32_64_waves_per_eu_2() {
|
|
|
|
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2:#[0-9]+]]
|
|
|
|
}
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4))) // expected-no-diagnostics
|
|
|
|
kernel void flat_work_group_size_32_64_waves_per_eu_2_4() {
|
|
|
|
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4:#[0-9]+]]
|
|
|
|
}
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_sgpr(32))) // expected-no-diagnostics
|
|
|
|
kernel void flat_work_group_size_32_64_num_sgpr_32() {
|
|
|
|
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_num_sgpr_32() [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32:#[0-9]+]]
|
|
|
|
}
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_vgpr(64))) // expected-no-diagnostics
|
|
|
|
kernel void flat_work_group_size_32_64_num_vgpr_64() {
|
|
|
|
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64:#[0-9]+]]
|
|
|
|
}
|
|
|
|
__attribute__((amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32))) // expected-no-diagnostics
|
|
|
|
kernel void waves_per_eu_2_num_sgpr_32() {
|
|
|
|
// CHECK: define amdgpu_kernel void @waves_per_eu_2_num_sgpr_32() [[WAVES_PER_EU_2_NUM_SGPR_32:#[0-9]+]]
|
|
|
|
}
|
|
|
|
__attribute__((amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64))) // expected-no-diagnostics
|
|
|
|
kernel void waves_per_eu_2_num_vgpr_64() {
|
|
|
|
// CHECK: define amdgpu_kernel void @waves_per_eu_2_num_vgpr_64() [[WAVES_PER_EU_2_NUM_VGPR_64:#[0-9]+]]
|
|
|
|
}
|
|
|
|
__attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32))) // expected-no-diagnostics
|
|
|
|
kernel void waves_per_eu_2_4_num_sgpr_32() {
|
|
|
|
// CHECK: define amdgpu_kernel void @waves_per_eu_2_4_num_sgpr_32() [[WAVES_PER_EU_2_4_NUM_SGPR_32:#[0-9]+]]
|
|
|
|
}
|
|
|
|
__attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64))) // expected-no-diagnostics
|
|
|
|
kernel void waves_per_eu_2_4_num_vgpr_64() {
|
|
|
|
// CHECK: define amdgpu_kernel void @waves_per_eu_2_4_num_vgpr_64() [[WAVES_PER_EU_2_4_NUM_VGPR_64:#[0-9]+]]
|
|
|
|
}
|
|
|
|
__attribute__((amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) // expected-no-diagnostics
|
|
|
|
kernel void num_sgpr_32_num_vgpr_64() {
|
|
|
|
// CHECK: define amdgpu_kernel void @num_sgpr_32_num_vgpr_64() [[NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]]
|
|
|
|
}
|
|
|
|
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32)))
|
|
|
|
kernel void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32() {
|
|
|
|
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32:#[0-9]+]]
|
|
|
|
}
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64)))
|
|
|
|
kernel void flat_work_group_size_32_64_waves_per_eu_2_num_vgpr_64() {
|
|
|
|
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64:#[0-9]+]]
|
|
|
|
}
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32)))
|
|
|
|
kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32() {
|
|
|
|
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32:#[0-9]+]]
|
|
|
|
}
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64)))
|
|
|
|
kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_vgpr_64() {
|
|
|
|
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64:#[0-9]+]]
|
|
|
|
}
|
|
|
|
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) // expected-no-diagnostics
|
|
|
|
kernel void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32_num_vgpr_64() {
|
|
|
|
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]]
|
|
|
|
}
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) // expected-no-diagnostics
|
|
|
|
kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() {
|
|
|
|
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]]
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make sure this is silently accepted on other targets.
|
|
|
|
// X86-NOT: "amdgpu-flat-work-group-size"
|
|
|
|
// X86-NOT: "amdgpu-waves-per-eu"
|
|
|
|
// X86-NOT: "amdgpu-num-vgpr"
|
|
|
|
// X86-NOT: "amdgpu-num-sgpr"
|
|
|
|
|
|
|
|
// CHECK-NOT: "amdgpu-flat-work-group-size"="0,0"
|
|
|
|
// CHECK-NOT: "amdgpu-waves-per-eu"="0"
|
|
|
|
// CHECK-NOT: "amdgpu-waves-per-eu"="0,0"
|
|
|
|
// CHECK-NOT: "amdgpu-num-sgpr"="0"
|
|
|
|
// CHECK-NOT: "amdgpu-num-vgpr"="0"
|
|
|
|
|
Cleanup the handling of noinline function attributes, -fno-inline,
-fno-inline-functions, -O0, and optnone.
These were really, really tangled together:
- We used the noinline LLVM attribute for -fno-inline
- But not for -fno-inline-functions (breaking LTO)
- But we did use it for -finline-hint-functions (yay, LTO is happy!)
- But we didn't for -O0 (LTO is sad yet again...)
- We had weird structuring of CodeGenOpts with both an inlining
enumeration and a boolean. They interacted in weird ways and
needlessly.
- A *lot* of set smashing went on with setting these, and then got worse
when we considered optnone and other inlining-effecting attributes.
- A bunch of inline affecting attributes were managed in a completely
different place from -fno-inline.
- Even with -fno-inline we failed to put the LLVM noinline attribute
onto many generated function definitions because they didn't show up
as AST-level functions.
- If you passed -O0 but -finline-functions we would run the normal
inliner pass in LLVM despite it being in the O0 pipeline, which really
doesn't make much sense.
- Lastly, we used things like '-fno-inline' to manipulate the pass
pipeline which forced the pass pipeline to be much more
parameterizable than it really needs to be. Instead we can *just* use
the optimization level to select a pipeline and control the rest via
attributes.
Sadly, this causes a bunch of churn in tests because we don't run the
optimizer in the tests and check the contents of attribute sets. It
would be awesome if attribute sets were a bit more FileCheck friendly,
but oh well.
I think this is a significant improvement and should remove the semantic
need to change what inliner pass we run in order to comply with the
requested inlining semantics by relying completely on attributes. It
also cleans up tho optnone and related handling a bit.
One unfortunate aspect of this is that for generating alwaysinline
routines like those in OpenMP we end up removing noinline and then
adding alwaysinline. I tried a bunch of other approaches, but because we
recompute function attributes from scratch and don't have a declaration
here I couldn't find anything substantially cleaner than this.
Differential Revision: https://reviews.llvm.org/D28053
llvm-svn: 290398
2016-12-23 09:24:49 +08:00
|
|
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64"
|
|
|
|
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { noinline nounwind "amdgpu-waves-per-eu"="2"
|
|
|
|
// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { noinline nounwind "amdgpu-waves-per-eu"="2,4"
|
|
|
|
// CHECK-DAG: attributes [[NUM_SGPR_32]] = { noinline nounwind "amdgpu-num-sgpr"="32"
|
|
|
|
// CHECK-DAG: attributes [[NUM_VGPR_64]] = { noinline nounwind "amdgpu-num-vgpr"="64"
|
2016-09-26 09:02:57 +08:00
|
|
|
|
Cleanup the handling of noinline function attributes, -fno-inline,
-fno-inline-functions, -O0, and optnone.
These were really, really tangled together:
- We used the noinline LLVM attribute for -fno-inline
- But not for -fno-inline-functions (breaking LTO)
- But we did use it for -finline-hint-functions (yay, LTO is happy!)
- But we didn't for -O0 (LTO is sad yet again...)
- We had weird structuring of CodeGenOpts with both an inlining
enumeration and a boolean. They interacted in weird ways and
needlessly.
- A *lot* of set smashing went on with setting these, and then got worse
when we considered optnone and other inlining-effecting attributes.
- A bunch of inline affecting attributes were managed in a completely
different place from -fno-inline.
- Even with -fno-inline we failed to put the LLVM noinline attribute
onto many generated function definitions because they didn't show up
as AST-level functions.
- If you passed -O0 but -finline-functions we would run the normal
inliner pass in LLVM despite it being in the O0 pipeline, which really
doesn't make much sense.
- Lastly, we used things like '-fno-inline' to manipulate the pass
pipeline which forced the pass pipeline to be much more
parameterizable than it really needs to be. Instead we can *just* use
the optimization level to select a pipeline and control the rest via
attributes.
Sadly, this causes a bunch of churn in tests because we don't run the
optimizer in the tests and check the contents of attribute sets. It
would be awesome if attribute sets were a bit more FileCheck friendly,
but oh well.
I think this is a significant improvement and should remove the semantic
need to change what inliner pass we run in order to comply with the
requested inlining semantics by relying completely on attributes. It
also cleans up tho optnone and related handling a bit.
One unfortunate aspect of this is that for generating alwaysinline
routines like those in OpenMP we end up removing noinline and then
adding alwaysinline. I tried a bunch of other approaches, but because we
recompute function attributes from scratch and don't have a declaration
here I couldn't find anything substantially cleaner than this.
Differential Revision: https://reviews.llvm.org/D28053
llvm-svn: 290398
2016-12-23 09:24:49 +08:00
|
|
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2"
|
|
|
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2,4"
|
|
|
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32"
|
|
|
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64"
|
|
|
|
// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { noinline nounwind "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
|
|
|
|
// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { noinline nounwind "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
|
|
|
|
// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { noinline nounwind "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
|
|
|
|
// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { noinline nounwind "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
|
|
|
|
// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { noinline nounwind "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64"
|
2016-09-26 09:02:57 +08:00
|
|
|
|
Cleanup the handling of noinline function attributes, -fno-inline,
-fno-inline-functions, -O0, and optnone.
These were really, really tangled together:
- We used the noinline LLVM attribute for -fno-inline
- But not for -fno-inline-functions (breaking LTO)
- But we did use it for -finline-hint-functions (yay, LTO is happy!)
- But we didn't for -O0 (LTO is sad yet again...)
- We had weird structuring of CodeGenOpts with both an inlining
enumeration and a boolean. They interacted in weird ways and
needlessly.
- A *lot* of set smashing went on with setting these, and then got worse
when we considered optnone and other inlining-effecting attributes.
- A bunch of inline affecting attributes were managed in a completely
different place from -fno-inline.
- Even with -fno-inline we failed to put the LLVM noinline attribute
onto many generated function definitions because they didn't show up
as AST-level functions.
- If you passed -O0 but -finline-functions we would run the normal
inliner pass in LLVM despite it being in the O0 pipeline, which really
doesn't make much sense.
- Lastly, we used things like '-fno-inline' to manipulate the pass
pipeline which forced the pass pipeline to be much more
parameterizable than it really needs to be. Instead we can *just* use
the optimization level to select a pipeline and control the rest via
attributes.
Sadly, this causes a bunch of churn in tests because we don't run the
optimizer in the tests and check the contents of attribute sets. It
would be awesome if attribute sets were a bit more FileCheck friendly,
but oh well.
I think this is a significant improvement and should remove the semantic
need to change what inliner pass we run in order to comply with the
requested inlining semantics by relying completely on attributes. It
also cleans up tho optnone and related handling a bit.
One unfortunate aspect of this is that for generating alwaysinline
routines like those in OpenMP we end up removing noinline and then
adding alwaysinline. I tried a bunch of other approaches, but because we
recompute function attributes from scratch and don't have a declaration
here I couldn't find anything substantially cleaner than this.
Differential Revision: https://reviews.llvm.org/D28053
llvm-svn: 290398
2016-12-23 09:24:49 +08:00
|
|
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
|
|
|
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
|
|
|
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
|
|
|
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
|
2016-09-26 09:02:57 +08:00
|
|
|
|
Cleanup the handling of noinline function attributes, -fno-inline,
-fno-inline-functions, -O0, and optnone.
These were really, really tangled together:
- We used the noinline LLVM attribute for -fno-inline
- But not for -fno-inline-functions (breaking LTO)
- But we did use it for -finline-hint-functions (yay, LTO is happy!)
- But we didn't for -O0 (LTO is sad yet again...)
- We had weird structuring of CodeGenOpts with both an inlining
enumeration and a boolean. They interacted in weird ways and
needlessly.
- A *lot* of set smashing went on with setting these, and then got worse
when we considered optnone and other inlining-effecting attributes.
- A bunch of inline affecting attributes were managed in a completely
different place from -fno-inline.
- Even with -fno-inline we failed to put the LLVM noinline attribute
onto many generated function definitions because they didn't show up
as AST-level functions.
- If you passed -O0 but -finline-functions we would run the normal
inliner pass in LLVM despite it being in the O0 pipeline, which really
doesn't make much sense.
- Lastly, we used things like '-fno-inline' to manipulate the pass
pipeline which forced the pass pipeline to be much more
parameterizable than it really needs to be. Instead we can *just* use
the optimization level to select a pipeline and control the rest via
attributes.
Sadly, this causes a bunch of churn in tests because we don't run the
optimizer in the tests and check the contents of attribute sets. It
would be awesome if attribute sets were a bit more FileCheck friendly,
but oh well.
I think this is a significant improvement and should remove the semantic
need to change what inliner pass we run in order to comply with the
requested inlining semantics by relying completely on attributes. It
also cleans up tho optnone and related handling a bit.
One unfortunate aspect of this is that for generating alwaysinline
routines like those in OpenMP we end up removing noinline and then
adding alwaysinline. I tried a bunch of other approaches, but because we
recompute function attributes from scratch and don't have a declaration
here I couldn't find anything substantially cleaner than this.
Differential Revision: https://reviews.llvm.org/D28053
llvm-svn: 290398
2016-12-23 09:24:49 +08:00
|
|
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
|
|
|
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
|