llvm-project/clang/test/CodeGenOpenCL/amdgpu-attrs.cl

167 lines
12 KiB
Common Lisp
Raw Normal View History

// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86 %s
__attribute__((amdgpu_flat_work_group_size(0, 0))) // expected-no-diagnostics
kernel void flat_work_group_size_0_0() {}
__attribute__((amdgpu_waves_per_eu(0))) // expected-no-diagnostics
kernel void waves_per_eu_0() {}
__attribute__((amdgpu_waves_per_eu(0, 0))) // expected-no-diagnostics
kernel void waves_per_eu_0_0() {}
__attribute__((amdgpu_num_sgpr(0))) // expected-no-diagnostics
kernel void num_sgpr0() {}
__attribute__((amdgpu_num_vgpr(0))) // expected-no-diagnostics
kernel void num_vgpr0() {}
__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0))) // expected-no-diagnostics
kernel void flat_work_group_size_0_0_waves_per_eu_0() {}
__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0))) // expected-no-diagnostics
kernel void flat_work_group_size_0_0_waves_per_eu_0_0() {}
__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
kernel void flat_work_group_size_0_0_num_sgpr_0() {}
__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
kernel void flat_work_group_size_0_0_num_vgpr_0() {}
__attribute__((amdgpu_waves_per_eu(0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
kernel void waves_per_eu_0_num_sgpr_0() {}
__attribute__((amdgpu_waves_per_eu(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
kernel void waves_per_eu_0_num_vgpr_0() {}
__attribute__((amdgpu_waves_per_eu(0, 0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
kernel void waves_per_eu_0_0_num_sgpr_0() {}
__attribute__((amdgpu_waves_per_eu(0, 0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
kernel void waves_per_eu_0_0_num_vgpr_0() {}
__attribute__((amdgpu_num_sgpr(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
kernel void num_sgpr_0_num_vgpr_0() {}
__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
kernel void flat_work_group_size_0_0_waves_per_eu_0_num_sgpr_0() {}
__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
kernel void flat_work_group_size_0_0_waves_per_eu_0_num_vgpr_0() {}
__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
kernel void flat_work_group_size_0_0_waves_per_eu_0_0_num_sgpr_0() {}
__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
kernel void flat_work_group_size_0_0_waves_per_eu_0_0_num_vgpr_0() {}
__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0), amdgpu_num_sgpr(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
kernel void flat_work_group_size_0_0_waves_per_eu_0_num_sgpr_0_num_vgpr_0() {}
__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0), amdgpu_num_sgpr(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
kernel void flat_work_group_size_0_0_waves_per_eu_0_0_num_sgpr_0_num_vgpr_0() {}
__attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics
kernel void flat_work_group_size_32_64() {
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]]
}
__attribute__((amdgpu_waves_per_eu(2))) // expected-no-diagnostics
kernel void waves_per_eu_2() {
// CHECK: define amdgpu_kernel void @waves_per_eu_2() [[WAVES_PER_EU_2:#[0-9]+]]
}
__attribute__((amdgpu_waves_per_eu(2, 4))) // expected-no-diagnostics
kernel void waves_per_eu_2_4() {
// CHECK: define amdgpu_kernel void @waves_per_eu_2_4() [[WAVES_PER_EU_2_4:#[0-9]+]]
}
__attribute__((amdgpu_num_sgpr(32))) // expected-no-diagnostics
kernel void num_sgpr_32() {
// CHECK: define amdgpu_kernel void @num_sgpr_32() [[NUM_SGPR_32:#[0-9]+]]
}
__attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics
kernel void num_vgpr_64() {
// CHECK: define amdgpu_kernel void @num_vgpr_64() [[NUM_VGPR_64:#[0-9]+]]
}
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2))) // expected-no-diagnostics
kernel void flat_work_group_size_32_64_waves_per_eu_2() {
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2:#[0-9]+]]
}
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4))) // expected-no-diagnostics
kernel void flat_work_group_size_32_64_waves_per_eu_2_4() {
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4:#[0-9]+]]
}
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_sgpr(32))) // expected-no-diagnostics
kernel void flat_work_group_size_32_64_num_sgpr_32() {
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_num_sgpr_32() [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32:#[0-9]+]]
}
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_vgpr(64))) // expected-no-diagnostics
kernel void flat_work_group_size_32_64_num_vgpr_64() {
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64:#[0-9]+]]
}
__attribute__((amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32))) // expected-no-diagnostics
kernel void waves_per_eu_2_num_sgpr_32() {
// CHECK: define amdgpu_kernel void @waves_per_eu_2_num_sgpr_32() [[WAVES_PER_EU_2_NUM_SGPR_32:#[0-9]+]]
}
__attribute__((amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64))) // expected-no-diagnostics
kernel void waves_per_eu_2_num_vgpr_64() {
// CHECK: define amdgpu_kernel void @waves_per_eu_2_num_vgpr_64() [[WAVES_PER_EU_2_NUM_VGPR_64:#[0-9]+]]
}
__attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32))) // expected-no-diagnostics
kernel void waves_per_eu_2_4_num_sgpr_32() {
// CHECK: define amdgpu_kernel void @waves_per_eu_2_4_num_sgpr_32() [[WAVES_PER_EU_2_4_NUM_SGPR_32:#[0-9]+]]
}
__attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64))) // expected-no-diagnostics
kernel void waves_per_eu_2_4_num_vgpr_64() {
// CHECK: define amdgpu_kernel void @waves_per_eu_2_4_num_vgpr_64() [[WAVES_PER_EU_2_4_NUM_VGPR_64:#[0-9]+]]
}
__attribute__((amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) // expected-no-diagnostics
kernel void num_sgpr_32_num_vgpr_64() {
// CHECK: define amdgpu_kernel void @num_sgpr_32_num_vgpr_64() [[NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]]
}
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32)))
kernel void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32() {
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32:#[0-9]+]]
}
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64)))
kernel void flat_work_group_size_32_64_waves_per_eu_2_num_vgpr_64() {
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64:#[0-9]+]]
}
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32)))
kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32() {
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32:#[0-9]+]]
}
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64)))
kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_vgpr_64() {
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64:#[0-9]+]]
}
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) // expected-no-diagnostics
kernel void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32_num_vgpr_64() {
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]]
}
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) // expected-no-diagnostics
kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() {
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]]
}
// Make sure this is silently accepted on other targets.
// X86-NOT: "amdgpu-flat-work-group-size"
// X86-NOT: "amdgpu-waves-per-eu"
// X86-NOT: "amdgpu-num-vgpr"
// X86-NOT: "amdgpu-num-sgpr"
// CHECK-NOT: "amdgpu-flat-work-group-size"="0,0"
// CHECK-NOT: "amdgpu-waves-per-eu"="0"
// CHECK-NOT: "amdgpu-waves-per-eu"="0,0"
// CHECK-NOT: "amdgpu-num-sgpr"="0"
// CHECK-NOT: "amdgpu-num-vgpr"="0"
Cleanup the handling of noinline function attributes, -fno-inline, -fno-inline-functions, -O0, and optnone. These were really, really tangled together: - We used the noinline LLVM attribute for -fno-inline - But not for -fno-inline-functions (breaking LTO) - But we did use it for -finline-hint-functions (yay, LTO is happy!) - But we didn't for -O0 (LTO is sad yet again...) - We had weird structuring of CodeGenOpts with both an inlining enumeration and a boolean. They interacted in weird ways and needlessly. - A *lot* of set smashing went on with setting these, and then got worse when we considered optnone and other inlining-effecting attributes. - A bunch of inline affecting attributes were managed in a completely different place from -fno-inline. - Even with -fno-inline we failed to put the LLVM noinline attribute onto many generated function definitions because they didn't show up as AST-level functions. - If you passed -O0 but -finline-functions we would run the normal inliner pass in LLVM despite it being in the O0 pipeline, which really doesn't make much sense. - Lastly, we used things like '-fno-inline' to manipulate the pass pipeline which forced the pass pipeline to be much more parameterizable than it really needs to be. Instead we can *just* use the optimization level to select a pipeline and control the rest via attributes. Sadly, this causes a bunch of churn in tests because we don't run the optimizer in the tests and check the contents of attribute sets. It would be awesome if attribute sets were a bit more FileCheck friendly, but oh well. I think this is a significant improvement and should remove the semantic need to change what inliner pass we run in order to comply with the requested inlining semantics by relying completely on attributes. It also cleans up tho optnone and related handling a bit. One unfortunate aspect of this is that for generating alwaysinline routines like those in OpenMP we end up removing noinline and then adding alwaysinline. I tried a bunch of other approaches, but because we recompute function attributes from scratch and don't have a declaration here I couldn't find anything substantially cleaner than this. Differential Revision: https://reviews.llvm.org/D28053 llvm-svn: 290398
2016-12-23 09:24:49 +08:00
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64"
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { noinline nounwind "amdgpu-waves-per-eu"="2"
// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { noinline nounwind "amdgpu-waves-per-eu"="2,4"
// CHECK-DAG: attributes [[NUM_SGPR_32]] = { noinline nounwind "amdgpu-num-sgpr"="32"
// CHECK-DAG: attributes [[NUM_VGPR_64]] = { noinline nounwind "amdgpu-num-vgpr"="64"
Cleanup the handling of noinline function attributes, -fno-inline, -fno-inline-functions, -O0, and optnone. These were really, really tangled together: - We used the noinline LLVM attribute for -fno-inline - But not for -fno-inline-functions (breaking LTO) - But we did use it for -finline-hint-functions (yay, LTO is happy!) - But we didn't for -O0 (LTO is sad yet again...) - We had weird structuring of CodeGenOpts with both an inlining enumeration and a boolean. They interacted in weird ways and needlessly. - A *lot* of set smashing went on with setting these, and then got worse when we considered optnone and other inlining-effecting attributes. - A bunch of inline affecting attributes were managed in a completely different place from -fno-inline. - Even with -fno-inline we failed to put the LLVM noinline attribute onto many generated function definitions because they didn't show up as AST-level functions. - If you passed -O0 but -finline-functions we would run the normal inliner pass in LLVM despite it being in the O0 pipeline, which really doesn't make much sense. - Lastly, we used things like '-fno-inline' to manipulate the pass pipeline which forced the pass pipeline to be much more parameterizable than it really needs to be. Instead we can *just* use the optimization level to select a pipeline and control the rest via attributes. Sadly, this causes a bunch of churn in tests because we don't run the optimizer in the tests and check the contents of attribute sets. It would be awesome if attribute sets were a bit more FileCheck friendly, but oh well. I think this is a significant improvement and should remove the semantic need to change what inliner pass we run in order to comply with the requested inlining semantics by relying completely on attributes. It also cleans up tho optnone and related handling a bit. One unfortunate aspect of this is that for generating alwaysinline routines like those in OpenMP we end up removing noinline and then adding alwaysinline. I tried a bunch of other approaches, but because we recompute function attributes from scratch and don't have a declaration here I couldn't find anything substantially cleaner than this. Differential Revision: https://reviews.llvm.org/D28053 llvm-svn: 290398
2016-12-23 09:24:49 +08:00
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2"
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2,4"
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32"
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64"
// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { noinline nounwind "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { noinline nounwind "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { noinline nounwind "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { noinline nounwind "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { noinline nounwind "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64"
Cleanup the handling of noinline function attributes, -fno-inline, -fno-inline-functions, -O0, and optnone. These were really, really tangled together: - We used the noinline LLVM attribute for -fno-inline - But not for -fno-inline-functions (breaking LTO) - But we did use it for -finline-hint-functions (yay, LTO is happy!) - But we didn't for -O0 (LTO is sad yet again...) - We had weird structuring of CodeGenOpts with both an inlining enumeration and a boolean. They interacted in weird ways and needlessly. - A *lot* of set smashing went on with setting these, and then got worse when we considered optnone and other inlining-effecting attributes. - A bunch of inline affecting attributes were managed in a completely different place from -fno-inline. - Even with -fno-inline we failed to put the LLVM noinline attribute onto many generated function definitions because they didn't show up as AST-level functions. - If you passed -O0 but -finline-functions we would run the normal inliner pass in LLVM despite it being in the O0 pipeline, which really doesn't make much sense. - Lastly, we used things like '-fno-inline' to manipulate the pass pipeline which forced the pass pipeline to be much more parameterizable than it really needs to be. Instead we can *just* use the optimization level to select a pipeline and control the rest via attributes. Sadly, this causes a bunch of churn in tests because we don't run the optimizer in the tests and check the contents of attribute sets. It would be awesome if attribute sets were a bit more FileCheck friendly, but oh well. I think this is a significant improvement and should remove the semantic need to change what inliner pass we run in order to comply with the requested inlining semantics by relying completely on attributes. It also cleans up tho optnone and related handling a bit. One unfortunate aspect of this is that for generating alwaysinline routines like those in OpenMP we end up removing noinline and then adding alwaysinline. I tried a bunch of other approaches, but because we recompute function attributes from scratch and don't have a declaration here I couldn't find anything substantially cleaner than this. Differential Revision: https://reviews.llvm.org/D28053 llvm-svn: 290398
2016-12-23 09:24:49 +08:00
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
Cleanup the handling of noinline function attributes, -fno-inline, -fno-inline-functions, -O0, and optnone. These were really, really tangled together: - We used the noinline LLVM attribute for -fno-inline - But not for -fno-inline-functions (breaking LTO) - But we did use it for -finline-hint-functions (yay, LTO is happy!) - But we didn't for -O0 (LTO is sad yet again...) - We had weird structuring of CodeGenOpts with both an inlining enumeration and a boolean. They interacted in weird ways and needlessly. - A *lot* of set smashing went on with setting these, and then got worse when we considered optnone and other inlining-effecting attributes. - A bunch of inline affecting attributes were managed in a completely different place from -fno-inline. - Even with -fno-inline we failed to put the LLVM noinline attribute onto many generated function definitions because they didn't show up as AST-level functions. - If you passed -O0 but -finline-functions we would run the normal inliner pass in LLVM despite it being in the O0 pipeline, which really doesn't make much sense. - Lastly, we used things like '-fno-inline' to manipulate the pass pipeline which forced the pass pipeline to be much more parameterizable than it really needs to be. Instead we can *just* use the optimization level to select a pipeline and control the rest via attributes. Sadly, this causes a bunch of churn in tests because we don't run the optimizer in the tests and check the contents of attribute sets. It would be awesome if attribute sets were a bit more FileCheck friendly, but oh well. I think this is a significant improvement and should remove the semantic need to change what inliner pass we run in order to comply with the requested inlining semantics by relying completely on attributes. It also cleans up tho optnone and related handling a bit. One unfortunate aspect of this is that for generating alwaysinline routines like those in OpenMP we end up removing noinline and then adding alwaysinline. I tried a bunch of other approaches, but because we recompute function attributes from scratch and don't have a declaration here I couldn't find anything substantially cleaner than this. Differential Revision: https://reviews.llvm.org/D28053 llvm-svn: 290398
2016-12-23 09:24:49 +08:00
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"