forked from OSchip/llvm-project
[AMDGPU] Emit module flag for all code object versions
Reviewed by: Changpeng Fang, Matt Arsenault, Brian Sumner Differential Revision: https://reviews.llvm.org/D134355
This commit is contained in:
parent
37c6a25e9a
commit
5e25284dbc
|
@ -583,9 +583,8 @@ void CodeGenModule::Release() {
|
|||
}
|
||||
// Emit amdgpu_code_object_version module flag, which is code object version
|
||||
// times 100.
|
||||
// ToDo: Enable module flag for all code object version when ROCm device
|
||||
// library is ready.
|
||||
if (getTarget().getTargetOpts().CodeObjectVersion == TargetOptions::COV_5) {
|
||||
if (getTarget().getTargetOpts().CodeObjectVersion !=
|
||||
TargetOptions::COV_None) {
|
||||
getModule().addModuleFlag(llvm::Module::Error,
|
||||
"amdgpu_code_object_version",
|
||||
getTarget().getTargetOpts().CodeObjectVersion);
|
||||
|
|
|
@ -1,16 +1,16 @@
|
|||
// Create module flag for code object version.
|
||||
|
||||
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
|
||||
// RUN: -o - %s | FileCheck %s -check-prefix=NONE
|
||||
// RUN: -o - %s | FileCheck %s -check-prefix=V4
|
||||
|
||||
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
|
||||
// RUN: -mcode-object-version=2 -o - %s | FileCheck -check-prefix=NONE %s
|
||||
// RUN: -mcode-object-version=2 -o - %s | FileCheck -check-prefix=V2 %s
|
||||
|
||||
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
|
||||
// RUN: -mcode-object-version=3 -o - %s | FileCheck -check-prefix=NONE %s
|
||||
// RUN: -mcode-object-version=3 -o - %s | FileCheck -check-prefix=V3 %s
|
||||
|
||||
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
|
||||
// RUN: -mcode-object-version=4 -o - %s | FileCheck -check-prefix=NONE %s
|
||||
// RUN: -mcode-object-version=4 -o - %s | FileCheck -check-prefix=V4 %s
|
||||
|
||||
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
|
||||
// RUN: -mcode-object-version=5 -o - %s | FileCheck -check-prefix=V5 %s
|
||||
|
@ -21,6 +21,9 @@
|
|||
// RUN: not %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
|
||||
// RUN: -mcode-object-version=4.1 -o - %s 2>&1| FileCheck %s -check-prefix=INV
|
||||
|
||||
// V2: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 200}
|
||||
// V3: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 300}
|
||||
// V4: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 400}
|
||||
// V5: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 500}
|
||||
// NONE-NOT: !{{.*}} = !{i32 1, !"amdgpu_code_object_version",
|
||||
// INV: error: invalid value '4.1' in '-mcode-object-version=4.1'
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel1Pi(i32 addrspace(1)*{{.*}} %x.coerce)
|
||||
// CHECK: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
|
||||
// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
|
||||
// OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4, !amdgpu.noclobber !2
|
||||
// OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4, !amdgpu.noclobber ![[MD:[0-9]+]]
|
||||
// OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1
|
||||
// OPT: store i32 [[INC]], i32 addrspace(1)* %x.coerce, align 4
|
||||
// OPT: ret void
|
||||
|
@ -30,7 +30,7 @@ __global__ void kernel1(int *x) {
|
|||
// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel2Ri(i32 addrspace(1)*{{.*}} nonnull align 4 dereferenceable(4) %x.coerce)
|
||||
// CHECK: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
|
||||
// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
|
||||
// OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4, !amdgpu.noclobber !2
|
||||
// OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4, !amdgpu.noclobber ![[MD]]
|
||||
// OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1
|
||||
// OPT: store i32 [[INC]], i32 addrspace(1)* %x.coerce, align 4
|
||||
// OPT: ret void
|
||||
|
@ -68,7 +68,7 @@ struct S {
|
|||
// OPT: [[R1:%.*]] = getelementptr inbounds %struct.S, %struct.S addrspace(4)* %0, i64 0, i32 1
|
||||
// OPT: [[P1:%.*]] = load float*, float* addrspace(4)* [[R1]], align 8
|
||||
// OPT: [[G1:%.*]] ={{.*}} addrspacecast float* [[P1]] to float addrspace(1)*
|
||||
// OPT: [[V0:%.*]] = load i32, i32 addrspace(1)* [[G0]], align 4, !amdgpu.noclobber !2
|
||||
// OPT: [[V0:%.*]] = load i32, i32 addrspace(1)* [[G0]], align 4, !amdgpu.noclobber ![[MD]]
|
||||
// OPT: [[INC:%.*]] = add nsw i32 [[V0]], 1
|
||||
// OPT: store i32 [[INC]], i32 addrspace(1)* [[G0]], align 4
|
||||
// OPT: [[V1:%.*]] = load float, float addrspace(1)* [[G1]], align 4
|
||||
|
@ -103,7 +103,7 @@ struct T {
|
|||
// OPT: [[R1:%.*]] = getelementptr inbounds %struct.T, %struct.T addrspace(4)* %0, i64 0, i32 0, i64 1
|
||||
// OPT: [[P1:%.*]] = load float*, float* addrspace(4)* [[R1]], align 8
|
||||
// OPT: [[G1:%.*]] ={{.*}} addrspacecast float* [[P1]] to float addrspace(1)*
|
||||
// OPT: [[V0:%.*]] = load float, float addrspace(1)* [[G0]], align 4, !amdgpu.noclobber !2
|
||||
// OPT: [[V0:%.*]] = load float, float addrspace(1)* [[G0]], align 4, !amdgpu.noclobber ![[MD]]
|
||||
// OPT: [[ADD0:%.*]] = fadd contract float [[V0]], 1.000000e+00
|
||||
// OPT: store float [[ADD0]], float addrspace(1)* [[G0]], align 4
|
||||
// OPT: [[V1:%.*]] = load float, float addrspace(1)* [[G1]], align 4
|
||||
|
@ -130,7 +130,7 @@ struct SS {
|
|||
// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel82SS(float addrspace(1)*{{.*}} %a.coerce)
|
||||
// CHECK: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
|
||||
// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
|
||||
// OPT: [[VAL:%.*]] = load float, float addrspace(1)* %a.coerce, align 4, !amdgpu.noclobber !2
|
||||
// OPT: [[VAL:%.*]] = load float, float addrspace(1)* %a.coerce, align 4, !amdgpu.noclobber ![[MD]]
|
||||
// OPT: [[INC:%.*]] = fadd contract float [[VAL]], 3.000000e+00
|
||||
// OPT: store float [[INC]], float addrspace(1)* %a.coerce, align 4
|
||||
// OPT: ret void
|
||||
|
|
Loading…
Reference in New Issue