[OpenCL] Add global_device and global_host address spaces

This patch introduces 2 new address spaces in OpenCL: global_device and global_host
which are a subset of a global address space, so the address space scheme will be
looking like:

```
generic->global->host
                          ->device
             ->private
             ->local
constant
```

Justification: USM allocations may be associated with both host and device memory. We
want to give users a way to tell the compiler the allocation type of a USM pointer for
optimization purposes. (Link to the Unified Shared Memory extension:
https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc)

Before this patch USM pointer could be only in opencl_global
address space, hence a device backend can't tell if a particular pointer
points to host or device memory. On FPGAs at least we can generate more
efficient hardware code if the user tells us where the pointer can point -
being able to distinguish between these types of pointers at compile time
allows us to instantiate simpler load-store units to perform memory
transactions.

Patch by Dmitry Sidorov.

Reviewed By: Anastasia

Differential Revision: https://reviews.llvm.org/D82174
This commit is contained in:
Alexey Bader 2020-07-29 15:07:06 +03:00
parent 2c662f3d3d
commit 8d27be8dba
22 changed files with 262 additions and 29 deletions

View File

@ -480,6 +480,11 @@ public:
// Otherwise in OpenCLC v2.0 s6.5.5: every address space except
// for __constant can be used as __generic.
(A == LangAS::opencl_generic && B != LangAS::opencl_constant) ||
// We also define global_device and global_host address spaces,
// to distinguish global pointers allocated on host from pointers
// allocated on device, which are a subset of __global.
(A == LangAS::opencl_global && (B == LangAS::opencl_global_device ||
B == LangAS::opencl_global_host)) ||
// Consider pointer size address spaces to be equivalent to default.
((isPtrSizeAddressSpace(A) || A == LangAS::Default) &&
(isPtrSizeAddressSpace(B) || B == LangAS::Default));

View File

@ -36,6 +36,8 @@ enum class LangAS : unsigned {
opencl_constant,
opencl_private,
opencl_generic,
opencl_global_device,
opencl_global_host,
// CUDA specific address spaces.
cuda_device,

View File

@ -1178,6 +1178,16 @@ def OpenCLGlobalAddressSpace : TypeAttr {
let Documentation = [OpenCLAddressSpaceGlobalDocs];
}
def OpenCLGlobalDeviceAddressSpace : TypeAttr {
let Spellings = [Clang<"opencl_global_device">];
let Documentation = [OpenCLAddressSpaceGlobalExtDocs];
}
def OpenCLGlobalHostAddressSpace : TypeAttr {
let Spellings = [Clang<"opencl_global_host">];
let Documentation = [OpenCLAddressSpaceGlobalExtDocs];
}
def OpenCLLocalAddressSpace : TypeAttr {
let Spellings = [Keyword<"__local">, Keyword<"local">, Clang<"opencl_local">];
let Documentation = [OpenCLAddressSpaceLocalDocs];

View File

@ -3123,6 +3123,30 @@ scope) variables and static local variable as well.
}];
}
def OpenCLAddressSpaceGlobalExtDocs : Documentation {
let Category = DocOpenCLAddressSpaces;
let Heading = "[[clang::opencl_global_device]], [[clang::opencl_global_host]]";
let Content = [{
The ``global_device`` and ``global_host`` address space attributes specify that
an object is allocated in global memory on the device/host. It helps to
distinguish USM (Unified Shared Memory) pointers that access global device
memory from those that access global host memory. These new address spaces are
a subset of the ``__global/opencl_global`` address space, the full address space
set model for OpenCL 2.0 with the extension looks as follows:
generic->global->host
->device
->private
->local
constant
As ``global_device`` and ``global_host`` are a subset of
``__global/opencl_global`` address spaces it is allowed to convert
``global_device`` and ``global_host`` address spaces to
``__global/opencl_global`` address spaces (following ISO/IEC TR 18037 5.1.3
"Address space nesting and rules for pointers).
}];
}
def OpenCLAddressSpaceLocalDocs : Documentation {
let Category = DocOpenCLAddressSpaces;
let Heading = "__local, local, [[clang::opencl_local]]";

View File

@ -606,6 +606,10 @@ public:
return LangAS::opencl_constant;
case ParsedAttr::AT_OpenCLGlobalAddressSpace:
return LangAS::opencl_global;
case ParsedAttr::AT_OpenCLGlobalDeviceAddressSpace:
return LangAS::opencl_global_device;
case ParsedAttr::AT_OpenCLGlobalHostAddressSpace:
return LangAS::opencl_global_host;
case ParsedAttr::AT_OpenCLLocalAddressSpace:
return LangAS::opencl_local;
case ParsedAttr::AT_OpenCLPrivateAddressSpace:

View File

@ -919,18 +919,20 @@ static const LangASMap *getAddressSpaceMap(const TargetInfo &T,
// The fake address space map must have a distinct entry for each
// language-specific address space.
static const unsigned FakeAddrSpaceMap[] = {
0, // Default
1, // opencl_global
3, // opencl_local
2, // opencl_constant
0, // opencl_private
4, // opencl_generic
5, // cuda_device
6, // cuda_constant
7, // cuda_shared
8, // ptr32_sptr
9, // ptr32_uptr
10 // ptr64
0, // Default
1, // opencl_global
3, // opencl_local
2, // opencl_constant
0, // opencl_private
4, // opencl_generic
5, // opencl_global_device
6, // opencl_global_host
7, // cuda_device
8, // cuda_constant
9, // cuda_shared
10, // ptr32_sptr
11, // ptr32_uptr
12 // ptr64
};
return &FakeAddrSpaceMap;
} else {

View File

@ -2388,16 +2388,39 @@ void CXXNameMangler::mangleQualifiers(Qualifiers Quals, const DependentAddressSp
switch (AS) {
default: llvm_unreachable("Not a language specific address space");
// <OpenCL-addrspace> ::= "CL" [ "global" | "local" | "constant" |
// "private"| "generic" ]
case LangAS::opencl_global: ASString = "CLglobal"; break;
case LangAS::opencl_local: ASString = "CLlocal"; break;
case LangAS::opencl_constant: ASString = "CLconstant"; break;
case LangAS::opencl_private: ASString = "CLprivate"; break;
case LangAS::opencl_generic: ASString = "CLgeneric"; break;
// "private"| "generic" | "device" |
// "host" ]
case LangAS::opencl_global:
ASString = "CLglobal";
break;
case LangAS::opencl_global_device:
ASString = "CLdevice";
break;
case LangAS::opencl_global_host:
ASString = "CLhost";
break;
case LangAS::opencl_local:
ASString = "CLlocal";
break;
case LangAS::opencl_constant:
ASString = "CLconstant";
break;
case LangAS::opencl_private:
ASString = "CLprivate";
break;
case LangAS::opencl_generic:
ASString = "CLgeneric";
break;
// <CUDA-addrspace> ::= "CU" [ "device" | "constant" | "shared" ]
case LangAS::cuda_device: ASString = "CUdevice"; break;
case LangAS::cuda_constant: ASString = "CUconstant"; break;
case LangAS::cuda_shared: ASString = "CUshared"; break;
case LangAS::cuda_device:
ASString = "CUdevice";
break;
case LangAS::cuda_constant:
ASString = "CUconstant";
break;
case LangAS::cuda_shared:
ASString = "CUshared";
break;
// <ptrsize-addrspace> ::= [ "ptr32_sptr" | "ptr32_uptr" | "ptr64" ]
case LangAS::ptr32_sptr:
ASString = "ptr32_sptr";

View File

@ -1798,7 +1798,7 @@ void MicrosoftCXXNameMangler::mangleAddressSpaceType(QualType T,
// where:
// <language_addr_space> ::= <OpenCL-addrspace> | <CUDA-addrspace>
// <OpenCL-addrspace> ::= "CL" [ "global" | "local" | "constant" |
// "private"| "generic" ]
// "private"| "generic" | "device" | "host" ]
// <CUDA-addrspace> ::= "CU" [ "device" | "constant" | "shared" ]
// Note that the above were chosen to match the Itanium mangling for this.
//
@ -1823,6 +1823,12 @@ void MicrosoftCXXNameMangler::mangleAddressSpaceType(QualType T,
case LangAS::opencl_global:
Extra.mangleSourceName("_ASCLglobal");
break;
case LangAS::opencl_global_device:
Extra.mangleSourceName("_ASCLdevice");
break;
case LangAS::opencl_global_host:
Extra.mangleSourceName("_ASCLhost");
break;
case LangAS::opencl_local:
Extra.mangleSourceName("_ASCLlocal");
break;

View File

@ -1564,6 +1564,8 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
case attr::OpenCLPrivateAddressSpace:
case attr::OpenCLGlobalAddressSpace:
case attr::OpenCLGlobalDeviceAddressSpace:
case attr::OpenCLGlobalHostAddressSpace:
case attr::OpenCLLocalAddressSpace:
case attr::OpenCLConstantAddressSpace:
case attr::OpenCLGenericAddressSpace:
@ -1866,6 +1868,10 @@ std::string Qualifiers::getAddrSpaceAsString(LangAS AS) {
return "__constant";
case LangAS::opencl_generic:
return "__generic";
case LangAS::opencl_global_device:
return "__global_device";
case LangAS::opencl_global_host:
return "__global_host";
case LangAS::cuda_device:
return "__device__";
case LangAS::cuda_constant:

View File

@ -46,6 +46,8 @@ const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
Constant, // opencl_constant
Private, // opencl_private
Generic, // opencl_generic
Global, // opencl_global_device
Global, // opencl_global_host
Global, // cuda_device
Constant, // cuda_constant
Local, // cuda_shared
@ -61,6 +63,8 @@ const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
Constant, // opencl_constant
Private, // opencl_private
Generic, // opencl_generic
Global, // opencl_global_device
Global, // opencl_global_host
Global, // cuda_device
Constant, // cuda_constant
Local, // cuda_shared

View File

@ -30,6 +30,8 @@ static const unsigned NVPTXAddrSpaceMap[] = {
0, // opencl_private
// FIXME: generic has to be added to the target
0, // opencl_generic
1, // opencl_global_device
1, // opencl_global_host
1, // cuda_device
4, // cuda_constant
3, // cuda_shared

View File

@ -28,6 +28,8 @@ static const unsigned SPIRAddrSpaceMap[] = {
2, // opencl_constant
0, // opencl_private
4, // opencl_generic
5, // opencl_global_device
6, // opencl_global_host
0, // cuda_device
0, // cuda_constant
0, // cuda_shared

View File

@ -35,6 +35,8 @@ static const unsigned TCEOpenCLAddrSpaceMap[] = {
4, // opencl_local
5, // opencl_constant
0, // opencl_private
1, // opencl_global_device
1, // opencl_global_host
// FIXME: generic has to be added to the target
0, // opencl_generic
0, // cuda_device

View File

@ -30,6 +30,8 @@ static const unsigned X86AddrSpaceMap[] = {
0, // opencl_constant
0, // opencl_private
0, // opencl_generic
0, // opencl_global_device
0, // opencl_global_host
0, // cuda_device
0, // cuda_constant
0, // cuda_shared

View File

@ -1324,10 +1324,18 @@ static void removeImageAccessQualifier(std::string& TyName) {
// (basically all single AS CPUs).
static unsigned ArgInfoAddressSpace(LangAS AS) {
switch (AS) {
case LangAS::opencl_global: return 1;
case LangAS::opencl_constant: return 2;
case LangAS::opencl_local: return 3;
case LangAS::opencl_generic: return 4; // Not in SPIR 2.0 specs.
case LangAS::opencl_global:
return 1;
case LangAS::opencl_constant:
return 2;
case LangAS::opencl_local:
return 3;
case LangAS::opencl_generic:
return 4; // Not in SPIR 2.0 specs.
case LangAS::opencl_global_device:
return 5;
case LangAS::opencl_global_host:
return 6;
default:
return 0; // Assume private.
}
@ -3792,6 +3800,8 @@ LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) {
if (LangOpts.OpenCL) {
AddrSpace = D ? D->getType().getAddressSpace() : LangAS::opencl_global;
assert(AddrSpace == LangAS::opencl_global ||
AddrSpace == LangAS::opencl_global_device ||
AddrSpace == LangAS::opencl_global_host ||
AddrSpace == LangAS::opencl_constant ||
AddrSpace == LangAS::opencl_local ||
AddrSpace >= LangAS::FirstTargetAddressSpace);

View File

@ -7968,6 +7968,8 @@ static bool isAddressSpaceKind(const ParsedAttr &attr) {
return attrKind == ParsedAttr::AT_AddressSpace ||
attrKind == ParsedAttr::AT_OpenCLPrivateAddressSpace ||
attrKind == ParsedAttr::AT_OpenCLGlobalAddressSpace ||
attrKind == ParsedAttr::AT_OpenCLGlobalDeviceAddressSpace ||
attrKind == ParsedAttr::AT_OpenCLGlobalHostAddressSpace ||
attrKind == ParsedAttr::AT_OpenCLLocalAddressSpace ||
attrKind == ParsedAttr::AT_OpenCLConstantAddressSpace ||
attrKind == ParsedAttr::AT_OpenCLGenericAddressSpace;
@ -8048,6 +8050,8 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
break;
case ParsedAttr::AT_OpenCLPrivateAddressSpace:
case ParsedAttr::AT_OpenCLGlobalAddressSpace:
case ParsedAttr::AT_OpenCLGlobalDeviceAddressSpace:
case ParsedAttr::AT_OpenCLGlobalHostAddressSpace:
case ParsedAttr::AT_OpenCLLocalAddressSpace:
case ParsedAttr::AT_OpenCLConstantAddressSpace:
case ParsedAttr::AT_OpenCLGenericAddressSpace:

View File

@ -17,6 +17,18 @@ void langas() {
// CHECK: VarDecl {{.*}} z_global '__global int *'
[[clang::opencl_global]] int *z_global;
// CHECK: VarDecl {{.*}} x_global_device '__global_device int *'
__attribute__((opencl_global_device)) int *x_global_device;
// CHECK: VarDecl {{.*}} z_global_device '__global_device int *'
[[clang::opencl_global_device]] int *z_global_device;
// CHECK: VarDecl {{.*}} x_global_host '__global_host int *'
__attribute__((opencl_global_host)) int *x_global_host;
// CHECK: VarDecl {{.*}} z_global_host '__global_host int *'
[[clang::opencl_global_host]] int *z_global_host;
// CHECK: VarDecl {{.*}} x_local '__local int *'
__attribute__((opencl_local)) int *x_local;

View File

@ -43,6 +43,10 @@ void ocl_f0(char __private *p) { }
struct ocl_OpaqueType;
typedef ocl_OpaqueType __global * ocl_OpaqueTypePtr;
typedef ocl_OpaqueType __attribute__((opencl_global_host)) * ocl_OpaqueTypePtrH;
typedef ocl_OpaqueType
__attribute__((opencl_global_device)) *
ocl_OpaqueTypePtrD;
// CHECKOCL-LABEL: define {{.*}}void @_Z6ocl_f0PU8CLglobal14ocl_OpaqueType
// WINOCL-LABEL: define {{.*}}void @"?ocl_f0@@YAXPEAU?$_ASCLglobal@$$CAUocl_OpaqueType@@@__clang@@@Z"
@ -61,4 +65,12 @@ __constant float *ocl_f1(char __generic const *p) { return 0;}
// CHECKOCL-LABEL: define {{.*}}float* @_Z6ocl_f2PU9CLgenericKc
// WINOCL-LABEL: define {{.*}}float* @"?ocl_f2@@YAPEAU?$_ASCLgeneric@$$CAM@__clang@@QEAU?$_ASCLgeneric@$$CBD@2@@Z"
__generic float *ocl_f2(__generic char const * const p) { return 0;}
// CHECKOCL-LABEL: define {{.*}}void @_Z6ocl_f3PU6CLhost14ocl_OpaqueType
// WINOCL-LABEL: define {{.*}}void @"?ocl_f3@@YAXPEAU?$_ASCLhost@$$CAUocl_OpaqueType@@@__clang@@@Z"
void ocl_f3(ocl_OpaqueTypePtrH) {}
// CHECKOCL-LABEL: define {{.*}}void @_Z6ocl_f4PU8CLdevice14ocl_OpaqueType
// WINOCL-LABEL: define {{.*}}void @"?ocl_f4@@YAXPEAU?$_ASCLdevice@$$CAUocl_OpaqueType@@@__clang@@@Z"
void ocl_f4(ocl_OpaqueTypePtrD) {}
#endif

View File

@ -6,7 +6,9 @@
// pointers to different address spaces
// CHECK: define void @test
void test(global int *arg_glob, generic int *arg_gen) {
void test(global int *arg_glob, generic int *arg_gen,
__attribute__((opencl_global_device)) int *arg_device,
__attribute__((opencl_global_host)) int *arg_host) {
int var_priv;
arg_gen = arg_glob; // implicit cast global -> generic
// CHECK: %{{[0-9]+}} = addrspacecast i32 addrspace(1)* %{{[0-9]+}} to i32 addrspace(4)*
@ -39,6 +41,30 @@ void test(global int *arg_glob, generic int *arg_gen) {
// CHECK-NOT: bitcast
// CHECK-NOFAKE: bitcast
// CHECK-NOFAKE-NOT: addrspacecast
arg_glob = arg_device; // implicit cast
// CHECK: addrspacecast
// CHECK-NOFAKE-NOT: addrspacecast
arg_glob = arg_host; // implicit cast
// CHECK: addrspacecast
// CHECK-NOFAKE-NOT: addrspacecast
arg_glob = (global int *)arg_device; // explicit cast
// CHECK: addrspacecast
// CHECK-NOFAKE-NOT: addrspacecast
arg_glob = (global int *)arg_host; // explicit cast
// CHECK: addrspacecast
// CHECK-NOFAKE-NOT: addrspacecast
arg_device = (__attribute((opencl_global_device)) int *)arg_glob; // explicit cast
// CHECK: addrspacecast
// CHECK-NOFAKE-NOT: addrspacecast
arg_host = (__attribute((opencl_global_host)) int *)arg_glob; // explicit cast
// CHECK: addrspacecast
// CHECK-NOFAKE-NOT: addrspacecast
}
// Test ternary operator.

View File

@ -51,6 +51,14 @@ void fl(local int *arg) {}
// AMDGCN: i32 addrspace(4)* %arg
void fc(constant int *arg) {}
// SPIR: i32 addrspace(5)* %arg
// AMDGCN: i32 addrspace(1)* %arg
void fd(__attribute__((opencl_global_device)) int *arg) {}
// SPIR: i32 addrspace(6)* %arg
// AMDGCN: i32 addrspace(1)* %arg
void fh(__attribute__((opencl_global_host)) int *arg) {}
#ifdef CL20
int i;
// CL20-DAG: @i = {{(dso_local )?}}addrspace(1) global i32 0

View File

@ -0,0 +1,67 @@
// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -cl-std=CL2.0
// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -cl-std=CL2.0 -DGENERIC
// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -cl-std=CL2.0 -DCONSTANT
// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -cl-std=CL2.0 -DLOCAL
/* USM (unified shared memory) extension for OpenCLC 2.0 adds two new address
* spaces: global_device and global_host that are a subset of __global address
* space. As ISO/IEC TR 18037 5.1.3 declares - it's possible to implicitly
* convert a subset address space to a superset address space, while conversion
* in a reversed direction could be achived only with an explicit cast */
#ifdef GENERIC
#define AS_COMP __generic
#else
#define AS_COMP __global
#endif // GENERIC
#ifdef CONSTANT
#define AS_INCOMP __constant
#elif LOCAL
#define AS_INCOMP __local
#else // PRIVATE
#define AS_INCOMP __private
#endif // CONSTANT
void test(AS_COMP int *arg_comp,
__attribute__((opencl_global_device)) int *arg_device,
__attribute__((opencl_global_host)) int *arg_host) {
AS_COMP int *var_glob1 = arg_device;
AS_COMP int *var_glob2 = arg_host;
AS_COMP int *var_glob3 = (AS_COMP int *)arg_device;
AS_COMP int *var_glob4 = (AS_COMP int *)arg_host;
arg_device = (__attribute__((opencl_global_device)) int *)arg_comp;
arg_host = (__attribute__((opencl_global_host)) int *)arg_comp;
#ifdef GENERIC
// expected-error@+6{{assigning '__generic int *__private' to '__global_device int *__private' changes address space of pointer}}
// expected-error@+6{{assigning '__generic int *__private' to '__global_host int *__private' changes address space of pointer}}
#else
// expected-error@+3{{assigning '__global int *__private' to '__global_device int *__private' changes address space of pointer}}
// expected-error@+3{{assigning '__global int *__private' to '__global_host int *__private' changes address space of pointer}}
#endif // GENERIC
arg_device = arg_comp;
arg_host = arg_comp;
#ifdef CONSTANT
// expected-error@+15{{initializing '__constant int *__private' with an expression of type '__global_device int *__private' changes address space of pointer}}
// expected-error@+15{{initializing '__constant int *__private' with an expression of type '__global_host int *__private' changes address space of pointer}}
// expected-error@+15{{initializing '__constant int *__private' with an expression of type '__global_device int *' changes address space of pointer}}
// expected-error@+16{{initializing '__constant int *__private' with an expression of type '__global_host int *' changes address space of pointer}}
#elif LOCAL
// expected-error@+10{{initializing '__local int *__private' with an expression of type '__global_device int *__private' changes address space of pointer}}
// expected-error@+10{{initializing '__local int *__private' with an expression of type '__global_host int *__private' changes address space of pointer}}
// expected-error@+10{{initializing '__local int *__private' with an expression of type '__global_device int *' changes address space of pointer}}
// expected-error@+11{{initializing '__local int *__private' with an expression of type '__global_host int *' changes address space of pointer}}
#else // PRIVATE
// expected-error@+5{{initializing '__private int *__private' with an expression of type '__global_device int *__private' changes address space of pointer}}
// expected-error@+5{{initializing '__private int *__private' with an expression of type '__global_host int *__private' changes address space of pointer}}
// expected-error@+5{{initializing '__private int *__private' with an expression of type '__global_device int *' changes address space of pointer}}
// expected-error@+6{{initializing '__private int *__private' with an expression of type '__global_host int *' changes address space of pointer}}
#endif // CONSTANT
AS_INCOMP int *var_incomp1 = arg_device;
AS_INCOMP int *var_incomp2 = arg_host;
AS_INCOMP int *var_incomp3 =
(__attribute__((opencl_global_device)) int *)arg_device;
AS_INCOMP int *var_incomp4 =
(__attribute__((opencl_global_host)) int *)arg_host;
}

View File

@ -43,7 +43,7 @@ void neg() {
template <long int I>
void tooBig() {
__attribute__((address_space(I))) int *bounds; // expected-error {{address space is larger than the maximum supported (8388595)}}
__attribute__((address_space(I))) int *bounds; // expected-error {{address space is larger than the maximum supported (8388593)}}
}
template <long int I>
@ -101,7 +101,7 @@ int main() {
car<1, 2, 3>(); // expected-note {{in instantiation of function template specialization 'car<1, 2, 3>' requested here}}
HasASTemplateFields<1> HASTF;
neg<-1>(); // expected-note {{in instantiation of function template specialization 'neg<-1>' requested here}}
correct<0x7FFFF3>();
correct<0x7FFFF1>();
tooBig<8388650>(); // expected-note {{in instantiation of function template specialization 'tooBig<8388650>' requested here}}
__attribute__((address_space(1))) char *x;