forked from OSchip/llvm-project
[CUDA][HIP] Do not treat host var address as constant in device compilation
Currently clang treats host var address as constant in device compilation, which causes const vars initialized with host var address promoted to device variables incorrectly and results in undefined symbols. This patch fixes that. Reviewed by: Artem Belevich Differential Revision: https://reviews.llvm.org/D118153 Fixes: SWDEV-309881 Change-Id: I0a69357063c6f8539ef259c96c250d04615f4473
This commit is contained in:
parent
15a3476f3f
commit
8428c75da1
|
@ -653,6 +653,20 @@ public:
|
||||||
/// Returns the clang bytecode interpreter context.
|
/// Returns the clang bytecode interpreter context.
|
||||||
interp::Context &getInterpContext();
|
interp::Context &getInterpContext();
|
||||||
|
|
||||||
|
struct CUDAConstantEvalContext {
|
||||||
|
/// Do not allow wrong-sided variables in constant expressions.
|
||||||
|
bool NoWrongSidedVars = false;
|
||||||
|
} CUDAConstantEvalCtx;
|
||||||
|
struct CUDAConstantEvalContextRAII {
|
||||||
|
ASTContext &Ctx;
|
||||||
|
CUDAConstantEvalContext SavedCtx;
|
||||||
|
CUDAConstantEvalContextRAII(ASTContext &Ctx_, bool NoWrongSidedVars)
|
||||||
|
: Ctx(Ctx_), SavedCtx(Ctx_.CUDAConstantEvalCtx) {
|
||||||
|
Ctx_.CUDAConstantEvalCtx.NoWrongSidedVars = NoWrongSidedVars;
|
||||||
|
}
|
||||||
|
~CUDAConstantEvalContextRAII() { Ctx.CUDAConstantEvalCtx = SavedCtx; }
|
||||||
|
};
|
||||||
|
|
||||||
/// Returns the dynamic AST node parent map context.
|
/// Returns the dynamic AST node parent map context.
|
||||||
ParentMapContext &getParentMapContext();
|
ParentMapContext &getParentMapContext();
|
||||||
|
|
||||||
|
|
|
@ -983,6 +983,8 @@ namespace {
|
||||||
discardCleanups();
|
discardCleanups();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ASTContext &getCtx() const override { return Ctx; }
|
||||||
|
|
||||||
void setEvaluatingDecl(APValue::LValueBase Base, APValue &Value,
|
void setEvaluatingDecl(APValue::LValueBase Base, APValue &Value,
|
||||||
EvaluatingDeclKind EDK = EvaluatingDeclKind::Ctor) {
|
EvaluatingDeclKind EDK = EvaluatingDeclKind::Ctor) {
|
||||||
EvaluatingDecl = Base;
|
EvaluatingDecl = Base;
|
||||||
|
@ -1116,8 +1118,6 @@ namespace {
|
||||||
|
|
||||||
Expr::EvalStatus &getEvalStatus() const override { return EvalStatus; }
|
Expr::EvalStatus &getEvalStatus() const override { return EvalStatus; }
|
||||||
|
|
||||||
ASTContext &getCtx() const override { return Ctx; }
|
|
||||||
|
|
||||||
// If we have a prior diagnostic, it will be noting that the expression
|
// If we have a prior diagnostic, it will be noting that the expression
|
||||||
// isn't a constant expression. This diagnostic is more important,
|
// isn't a constant expression. This diagnostic is more important,
|
||||||
// unless we require this evaluation to produce a constant expression.
|
// unless we require this evaluation to produce a constant expression.
|
||||||
|
@ -2216,6 +2216,19 @@ static bool CheckLValueConstantExpression(EvalInfo &Info, SourceLocation Loc,
|
||||||
if (!isForManglingOnly(Kind) && Var->hasAttr<DLLImportAttr>())
|
if (!isForManglingOnly(Kind) && Var->hasAttr<DLLImportAttr>())
|
||||||
// FIXME: Diagnostic!
|
// FIXME: Diagnostic!
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
// In CUDA/HIP device compilation, only device side variables have
|
||||||
|
// constant addresses.
|
||||||
|
if (Info.getCtx().getLangOpts().CUDA &&
|
||||||
|
Info.getCtx().getLangOpts().CUDAIsDevice &&
|
||||||
|
Info.getCtx().CUDAConstantEvalCtx.NoWrongSidedVars) {
|
||||||
|
if ((!Var->hasAttr<CUDADeviceAttr>() &&
|
||||||
|
!Var->hasAttr<CUDAConstantAttr>() &&
|
||||||
|
!Var->getType()->isCUDADeviceBuiltinSurfaceType() &&
|
||||||
|
!Var->getType()->isCUDADeviceBuiltinTextureType()) ||
|
||||||
|
Var->hasAttr<HIPManagedAttr>())
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (const auto *FD = dyn_cast<const FunctionDecl>(BaseVD)) {
|
if (const auto *FD = dyn_cast<const FunctionDecl>(BaseVD)) {
|
||||||
// __declspec(dllimport) must be handled very carefully:
|
// __declspec(dllimport) must be handled very carefully:
|
||||||
|
|
|
@ -590,6 +590,8 @@ bool HasAllowedCUDADeviceStaticInitializer(Sema &S, VarDecl *VD,
|
||||||
};
|
};
|
||||||
auto IsConstantInit = [&](const Expr *Init) {
|
auto IsConstantInit = [&](const Expr *Init) {
|
||||||
assert(Init);
|
assert(Init);
|
||||||
|
ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.Context,
|
||||||
|
/*NoWronSidedVars=*/true);
|
||||||
return Init->isConstantInitializer(S.Context,
|
return Init->isConstantInitializer(S.Context,
|
||||||
VD->getType()->isReferenceType());
|
VD->getType()->isReferenceType());
|
||||||
};
|
};
|
||||||
|
|
|
@ -0,0 +1,54 @@
|
||||||
|
// REQUIRES: amdgpu-registered-target
|
||||||
|
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
|
||||||
|
// RUN: -emit-llvm -o - | FileCheck -check-prefix=DEV %s
|
||||||
|
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x hip %s \
|
||||||
|
// RUN: -emit-llvm -o - | FileCheck -check-prefix=HOST %s
|
||||||
|
|
||||||
|
// Negative tests.
|
||||||
|
|
||||||
|
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
|
||||||
|
// RUN: -emit-llvm -o - | FileCheck -check-prefix=DEV-NEG %s
|
||||||
|
|
||||||
|
#include "Inputs/cuda.h"
|
||||||
|
|
||||||
|
// Test const var initialized with address of a const var.
|
||||||
|
// Both are promoted to device side.
|
||||||
|
|
||||||
|
// DEV-DAG: @_ZN5Test1L1aE = internal addrspace(4) constant i32 1
|
||||||
|
// DEV-DAG: @_ZN5Test11B2p1E = addrspace(4) externally_initialized constant i32* addrspacecast (i32 addrspace(4)* @_ZN5Test1L1aE to i32*)
|
||||||
|
// DEV-DAG: @_ZN5Test11B2p2E = addrspace(4) externally_initialized constant i32* addrspacecast (i32 addrspace(4)* @_ZN5Test1L1aE to i32*)
|
||||||
|
// DEV-DAG: @_ZN5Test12b2E = addrspace(1) externally_initialized global i32 1
|
||||||
|
// HOST-DAG: @_ZN5Test1L1aE = internal constant i32 1
|
||||||
|
// HOST-DAG: @_ZN5Test11B2p1E = constant i32* @_ZN5Test1L1aE
|
||||||
|
// HOST-DAG: @_ZN5Test11B2p2E = internal constant i32* undef
|
||||||
|
// HOST-DAG: @_ZN5Test12b1E = global i32 1
|
||||||
|
// HOST-DAG: @_ZN5Test12b2E = internal global i32 undef
|
||||||
|
namespace Test1 {
|
||||||
|
const int a = 1;
|
||||||
|
|
||||||
|
struct B {
|
||||||
|
static const int *const p1;
|
||||||
|
static __device__ const int *const p2;
|
||||||
|
};
|
||||||
|
const int *const B::p1 = &a;
|
||||||
|
__device__ const int *const B::p2 = &a;
|
||||||
|
int b1 = B::p1 == B::p2;
|
||||||
|
__device__ int b2 = B::p1 == B::p2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test const var initialized with address of a non-cost var.
|
||||||
|
// Neither is promoted to device side.
|
||||||
|
|
||||||
|
// DEV-NEG-NOT: @_ZN5Test2L1aE
|
||||||
|
// DEV-NEG-NOT: @_ZN5Test21B1pE
|
||||||
|
// HOST-DAG: @_ZN5Test21aE = global i32 1
|
||||||
|
// HOST-DAG: @_ZN5Test21B1pE = constant i32* @_ZN5Test21aE
|
||||||
|
|
||||||
|
namespace Test2 {
|
||||||
|
int a = 1;
|
||||||
|
|
||||||
|
struct B {
|
||||||
|
static int *const p;
|
||||||
|
};
|
||||||
|
int *const B::p = &a;
|
||||||
|
}
|
|
@ -0,0 +1,111 @@
|
||||||
|
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
|
||||||
|
// RUN: -fsyntax-only -verify
|
||||||
|
// RUN: %clang_cc1 -triple x86_64 -x hip %s \
|
||||||
|
// RUN: -fsyntax-only -verify=host
|
||||||
|
|
||||||
|
// host-no-diagnostics
|
||||||
|
|
||||||
|
#include "Inputs/cuda.h"
|
||||||
|
|
||||||
|
// Test const var initialized with address of a const var.
|
||||||
|
// Both are promoted to device side.
|
||||||
|
|
||||||
|
namespace Test1 {
|
||||||
|
const int a = 1;
|
||||||
|
|
||||||
|
struct B {
|
||||||
|
static const int *const p;
|
||||||
|
__device__ static const int *const p2;
|
||||||
|
};
|
||||||
|
const int *const B::p = &a;
|
||||||
|
// Const variable 'a' is treated as __constant__ on device side,
|
||||||
|
// therefore its address can be used as initializer for another
|
||||||
|
// device variable.
|
||||||
|
__device__ const int *const B::p2 = &a;
|
||||||
|
|
||||||
|
__device__ void f() {
|
||||||
|
int y = a;
|
||||||
|
const int *x = B::p;
|
||||||
|
const int *z = B::p2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test const var initialized with address of a non-cost var.
|
||||||
|
// Neither is promoted to device side.
|
||||||
|
|
||||||
|
namespace Test2 {
|
||||||
|
int a = 1;
|
||||||
|
// expected-note@-1{{host variable declared here}}
|
||||||
|
|
||||||
|
struct B {
|
||||||
|
static int *const p;
|
||||||
|
};
|
||||||
|
int *const B::p = &a;
|
||||||
|
// expected-note@-1{{const variable cannot be emitted on device side due to dynamic initialization}}
|
||||||
|
|
||||||
|
__device__ void f() {
|
||||||
|
int y = a;
|
||||||
|
// expected-error@-1{{reference to __host__ variable 'a' in __device__ function}}
|
||||||
|
const int *x = B::p;
|
||||||
|
// expected-error@-1{{reference to __host__ variable 'p' in __device__ function}}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test device var initialized with address of a non-const host var, __shared var,
|
||||||
|
// __managed__ var, __device__ var, __constant__ var, texture var, surface var.
|
||||||
|
|
||||||
|
namespace Test3 {
|
||||||
|
struct textureReference {
|
||||||
|
int desc;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum ReadMode {
|
||||||
|
ElementType = 0,
|
||||||
|
NormalizedFloat = 1
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, int dim = 1, enum ReadMode mode = ElementType>
|
||||||
|
struct __attribute__((device_builtin_texture_type)) texture : public textureReference {
|
||||||
|
};
|
||||||
|
|
||||||
|
struct surfaceReference {
|
||||||
|
int desc;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, int dim = 1>
|
||||||
|
struct __attribute__((device_builtin_surface_type)) surface : public surfaceReference {
|
||||||
|
};
|
||||||
|
|
||||||
|
// Partial specialization over `void`.
|
||||||
|
template<int dim>
|
||||||
|
struct __attribute__((device_builtin_surface_type)) surface<void, dim> : public surfaceReference {
|
||||||
|
};
|
||||||
|
|
||||||
|
texture<float, 2, ElementType> tex;
|
||||||
|
surface<void, 2> surf;
|
||||||
|
|
||||||
|
int a = 1;
|
||||||
|
__shared__ int b;
|
||||||
|
__managed__ int c = 1;
|
||||||
|
__device__ int d = 1;
|
||||||
|
__constant__ int e = 1;
|
||||||
|
struct B {
|
||||||
|
__device__ static int *const p1;
|
||||||
|
__device__ static int *const p2;
|
||||||
|
__device__ static int *const p3;
|
||||||
|
__device__ static int *const p4;
|
||||||
|
__device__ static int *const p5;
|
||||||
|
__device__ static texture<float, 2, ElementType> *const p6;
|
||||||
|
__device__ static surface<void, 2> *const p7;
|
||||||
|
};
|
||||||
|
__device__ int *const B::p1 = &a;
|
||||||
|
// expected-error@-1{{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}
|
||||||
|
__device__ int *const B::p2 = &b;
|
||||||
|
// expected-error@-1{{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}
|
||||||
|
__device__ int *const B::p3 = &c;
|
||||||
|
// expected-error@-1{{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}
|
||||||
|
__device__ int *const B::p4 = &d;
|
||||||
|
__device__ int *const B::p5 = &e;
|
||||||
|
__device__ texture<float, 2, ElementType> *const B::p6 = &tex;
|
||||||
|
__device__ surface<void, 2> *const B::p7 = &surf;
|
||||||
|
}
|
Loading…
Reference in New Issue