forked from OSchip/llvm-project
[OpenMP][libomptarget] Enable requires flags for target libraries.
Summary: Target link variables are currently implemented by creating a copy of the variables on the device side and unified memory never gets exploited. When the prgram uses the: ``` #pragma omp requires unified_shared_memory ``` directive in conjunction with a declare target link, the linked variable is no longer allocated on the device and the host version is used instead. This behavior is overridden by performing an explicit mapping. A Clang side patch is required. Reviewers: ABataev, AlexEichenberger, grokos, Hahnfeld Reviewed By: AlexEichenberger, grokos, Hahnfeld Subscribers: Hahnfeld, jfb, guansong, jdoerfert, openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D60223 llvm-svn: 361294
This commit is contained in:
parent
44d17ca02e
commit
9e9c918259
|
@ -60,6 +60,21 @@ enum OpenMPOffloadingDeclareTargetFlags {
|
|||
OMP_DECLARE_TARGET_DTOR = 0x04
|
||||
};
|
||||
|
||||
enum OpenMPOffloadingRequiresDirFlags {
|
||||
/// flag undefined.
|
||||
OMP_REQ_UNDEFINED = 0x000,
|
||||
/// no requires directive present.
|
||||
OMP_REQ_NONE = 0x001,
|
||||
/// reverse_offload clause.
|
||||
OMP_REQ_REVERSE_OFFLOAD = 0x002,
|
||||
/// unified_address clause.
|
||||
OMP_REQ_UNIFIED_ADDRESS = 0x004,
|
||||
/// unified_shared_memory clause.
|
||||
OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
|
||||
/// dynamic_allocators clause.
|
||||
OMP_REQ_DYNAMIC_ALLOCATORS = 0x010
|
||||
};
|
||||
|
||||
/// This struct is a record of an entry point or global. For a function
|
||||
/// entry point the size is expected to be zero
|
||||
struct __tgt_offload_entry {
|
||||
|
@ -113,6 +128,9 @@ int omp_target_associate_ptr(void *host_ptr, void *device_ptr, size_t size,
|
|||
size_t device_offset, int device_num);
|
||||
int omp_target_disassociate_ptr(void *host_ptr, int device_num);
|
||||
|
||||
/// add the clauses of the requires directives in a given file
|
||||
void __tgt_register_requires(int64_t flags);
|
||||
|
||||
/// adds a target shared library to the target execution image
|
||||
void __tgt_register_lib(__tgt_bin_desc *desc);
|
||||
|
||||
|
|
|
@ -152,7 +152,7 @@ LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) {
|
|||
|
||||
// Used by target_data_begin
|
||||
// Return the target pointer begin (where the data will be moved).
|
||||
// Allocate memory if this is the first occurrence if this mapping.
|
||||
// Allocate memory if this is the first occurrence of this mapping.
|
||||
// Increment the reference counter.
|
||||
// If NULL is returned, then either data allocation failed or the user tried
|
||||
// to do an illegal mapping.
|
||||
|
|
|
@ -98,11 +98,13 @@ struct DeviceTy {
|
|||
|
||||
uint64_t loopTripCnt;
|
||||
|
||||
int64_t RTLRequiresFlags;
|
||||
|
||||
DeviceTy(RTLInfoTy *RTL)
|
||||
: DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(),
|
||||
HasPendingGlobals(false), HostDataToTargetMap(),
|
||||
PendingCtorsDtors(), ShadowPtrMap(), DataMapMtx(), PendingGlobalsMtx(),
|
||||
ShadowMtx(), loopTripCnt(0) {}
|
||||
ShadowMtx(), loopTripCnt(0), RTLRequiresFlags(0) {}
|
||||
|
||||
// The existence of mutexes makes DeviceTy non-copyable. We need to
|
||||
// provide a copy constructor and an assignment operator explicitly.
|
||||
|
@ -112,7 +114,8 @@ struct DeviceTy {
|
|||
HostDataToTargetMap(d.HostDataToTargetMap),
|
||||
PendingCtorsDtors(d.PendingCtorsDtors), ShadowPtrMap(d.ShadowPtrMap),
|
||||
DataMapMtx(), PendingGlobalsMtx(),
|
||||
ShadowMtx(), loopTripCnt(d.loopTripCnt) {}
|
||||
ShadowMtx(), loopTripCnt(d.loopTripCnt),
|
||||
RTLRequiresFlags(d.RTLRequiresFlags) {}
|
||||
|
||||
DeviceTy& operator=(const DeviceTy &d) {
|
||||
DeviceID = d.DeviceID;
|
||||
|
@ -124,6 +127,7 @@ struct DeviceTy {
|
|||
PendingCtorsDtors = d.PendingCtorsDtors;
|
||||
ShadowPtrMap = d.ShadowPtrMap;
|
||||
loopTripCnt = d.loopTripCnt;
|
||||
RTLRequiresFlags = d.RTLRequiresFlags;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
VERS1.0 {
|
||||
global:
|
||||
__tgt_register_requires;
|
||||
__tgt_register_lib;
|
||||
__tgt_unregister_lib;
|
||||
__tgt_target_data_begin;
|
||||
|
|
|
@ -57,7 +57,7 @@ static void HandleTargetOutcome(bool success) {
|
|||
}
|
||||
break;
|
||||
case tgt_default:
|
||||
FATAL_MESSAGE0(1, "default offloading policy must switched to "
|
||||
FATAL_MESSAGE0(1, "default offloading policy must switched to "
|
||||
"mandatory or disabled");
|
||||
break;
|
||||
case tgt_mandatory:
|
||||
|
@ -68,6 +68,12 @@ static void HandleTargetOutcome(bool success) {
|
|||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// adds requires flags
|
||||
EXTERN void __tgt_register_requires(int64_t flags) {
|
||||
RTLs.RegisterRequires(flags);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// adds a target shared library to the target execution image
|
||||
EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) {
|
||||
|
|
|
@ -186,6 +186,46 @@ static void RegisterGlobalCtorsDtorsForImage(__tgt_bin_desc *desc,
|
|||
}
|
||||
}
|
||||
|
||||
void RTLsTy::RegisterRequires(int64_t flags) {
|
||||
// TODO: add more elaborate check.
|
||||
// Minimal check: only set requires flags if previous value
|
||||
// is undefined. This ensures that only the first call to this
|
||||
// function will set the requires flags. All subsequent calls
|
||||
// will be checked for compatibility.
|
||||
assert(flags != OMP_REQ_UNDEFINED &&
|
||||
"illegal undefined flag for requires directive!");
|
||||
if (RequiresFlags == OMP_REQ_UNDEFINED) {
|
||||
RequiresFlags = flags;
|
||||
return;
|
||||
}
|
||||
|
||||
// If multiple compilation units are present enforce
|
||||
// consistency across all of them for require clauses:
|
||||
// - reverse_offload
|
||||
// - unified_address
|
||||
// - unified_shared_memory
|
||||
if ((RequiresFlags & OMP_REQ_REVERSE_OFFLOAD) !=
|
||||
(flags & OMP_REQ_REVERSE_OFFLOAD)) {
|
||||
FATAL_MESSAGE0(1,
|
||||
"'#pragma omp requires reverse_offload' not used consistently!");
|
||||
}
|
||||
if ((RequiresFlags & OMP_REQ_UNIFIED_ADDRESS) !=
|
||||
(flags & OMP_REQ_UNIFIED_ADDRESS)) {
|
||||
FATAL_MESSAGE0(1,
|
||||
"'#pragma omp requires unified_address' not used consistently!");
|
||||
}
|
||||
if ((RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) !=
|
||||
(flags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
|
||||
FATAL_MESSAGE0(1,
|
||||
"'#pragma omp requires unified_shared_memory' not used consistently!");
|
||||
}
|
||||
|
||||
// TODO: insert any other missing checks
|
||||
|
||||
DP("New requires flags %ld compatible with existing %ld!\n",
|
||||
flags, RequiresFlags);
|
||||
}
|
||||
|
||||
void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
|
||||
// Attempt to load all plugins available in the system.
|
||||
std::call_once(initFlag, &RTLsTy::LoadRTLs, this);
|
||||
|
@ -222,6 +262,8 @@ void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
|
|||
Devices[start + device_id].DeviceID = start + device_id;
|
||||
// RTL local device ID
|
||||
Devices[start + device_id].RTLDeviceID = device_id;
|
||||
// RTL requires flags
|
||||
Devices[start + device_id].RTLRequiresFlags = RequiresFlags;
|
||||
}
|
||||
|
||||
// Initialize the index of this RTL and save it in the used RTLs.
|
||||
|
|
|
@ -118,8 +118,13 @@ public:
|
|||
// binaries.
|
||||
std::vector<RTLInfoTy *> UsedRTLs;
|
||||
|
||||
int64_t RequiresFlags;
|
||||
|
||||
explicit RTLsTy() {}
|
||||
|
||||
// Register the clauses of the requires directive.
|
||||
void RegisterRequires(int64_t flags);
|
||||
|
||||
// Register a shared library with all (compatible) RTLs.
|
||||
void RegisterLib(__tgt_bin_desc *desc);
|
||||
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
// RUN: %libomptarget-compile-aarch64-unknown-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-aarch64-unknown-linux-gnu 2>&1 | %fcheck-aarch64-unknown-linux-gnu -allow-empty -check-prefix=DEBUG
|
||||
// RUN: %libomptarget-compile-powerpc64-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64-ibm-linux-gnu 2>&1 | %fcheck-powerpc64-ibm-linux-gnu -allow-empty -check-prefix=DEBUG
|
||||
// RUN: %libomptarget-compile-powerpc64le-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64le-ibm-linux-gnu 2>&1 | %fcheck-powerpc64le-ibm-linux-gnu -allow-empty -check-prefix=DEBUG
|
||||
// RUN: %libomptarget-compile-x86_64-pc-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-x86_64-pc-linux-gnu 2>&1 | %fcheck-x86_64-pc-linux-gnu -allow-empty -check-prefix=DEBUG
|
||||
// REQUIRES: libomptarget-debug
|
||||
|
||||
/*
|
||||
Test for the 'requires' clause check.
|
||||
When a target region is used, the requires flags are set in the
|
||||
runtime for the entire compilation unit. If the flags are set again,
|
||||
(for whatever reason) the set must be consistent with previously
|
||||
set values.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <omp.h>
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Various definitions copied from OpenMP RTL
|
||||
|
||||
extern void __tgt_register_requires(int64_t);
|
||||
|
||||
// End of definitions copied from OpenMP RTL.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
void run_reg_requires() {
|
||||
// Before the target region is registered, the requires registers the status
|
||||
// of the requires clauses. Since there are no requires clauses in this file
|
||||
// the flags state can only be OMP_REQ_NONE i.e. 1.
|
||||
|
||||
// This is the 2nd time this function is called so it should print the debug
|
||||
// info belonging to the check.
|
||||
__tgt_register_requires(1);
|
||||
__tgt_register_requires(1);
|
||||
// DEBUG: New requires flags 1 compatible with existing 1!
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
int main() {
|
||||
run_reg_requires();
|
||||
|
||||
// This also runs reg requires for the first time.
|
||||
#pragma omp target
|
||||
{}
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue