forked from OSchip/llvm-project
[libomptarget] Add allocator support for target memory
This patch adds the infrastructure for allocator support for target memory. Three allocators are introduced for device, host and shared memory. The corresponding API functions have the llvm_ prefix temporarily, until they become part of the OpenMP standard. Differential Revision: https://reviews.llvm.org/D97883
This commit is contained in:
parent
2902bdeea1
commit
2468fdd9af
|
@ -86,6 +86,13 @@ enum OpenMPOffloadingRequiresDirFlags {
|
|||
OMP_REQ_DYNAMIC_ALLOCATORS = 0x010
|
||||
};
|
||||
|
||||
enum TargetAllocTy : int32_t {
|
||||
TARGET_ALLOC_DEVICE = 0,
|
||||
TARGET_ALLOC_HOST,
|
||||
TARGET_ALLOC_SHARED,
|
||||
TARGET_ALLOC_DEFAULT
|
||||
};
|
||||
|
||||
/// This struct is a record of an entry point or global. For a function
|
||||
/// entry point the size is expected to be zero
|
||||
struct __tgt_offload_entry {
|
||||
|
@ -190,6 +197,12 @@ int omp_target_associate_ptr(void *host_ptr, void *device_ptr, size_t size,
|
|||
size_t device_offset, int device_num);
|
||||
int omp_target_disassociate_ptr(void *host_ptr, int device_num);
|
||||
|
||||
/// Explicit target memory allocators
|
||||
/// Using the llvm_ prefix until they become part of the OpenMP standard.
|
||||
void *llvm_omp_target_alloc_device(size_t size, int device_num);
|
||||
void *llvm_omp_target_alloc_host(size_t size, int device_num);
|
||||
void *llvm_omp_target_alloc_shared(size_t size, int device_num);
|
||||
|
||||
/// add the clauses of the requires directives in a given file
|
||||
void __tgt_register_requires(int64_t flags);
|
||||
|
||||
|
|
|
@ -65,8 +65,10 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t ID,
|
|||
// initialize the target data mapping structures. These addresses are
|
||||
// used to generate a table of target variables to pass to
|
||||
// __tgt_rtl_run_region(). The __tgt_rtl_data_alloc() returns NULL in
|
||||
// case an error occurred on the target device.
|
||||
void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr);
|
||||
// case an error occurred on the target device. Kind dictates what allocator
|
||||
// to use (e.g. shared, host, device).
|
||||
void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr,
|
||||
int32_t Kind);
|
||||
|
||||
// Pass the data content to the target device using the target address. In case
|
||||
// of success, return zero. Otherwise, return an error code.
|
||||
|
|
|
@ -1488,9 +1488,16 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
|
|||
return DeviceInfo.getOffloadEntriesTable(device_id);
|
||||
}
|
||||
|
||||
void *__tgt_rtl_data_alloc(int device_id, int64_t size, void *) {
|
||||
void *__tgt_rtl_data_alloc(int device_id, int64_t size, void *, int32_t kind) {
|
||||
void *ptr = NULL;
|
||||
assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large");
|
||||
|
||||
if (kind != TARGET_ALLOC_DEFAULT) {
|
||||
REPORT("Invalid target data allocation kind or requested allocator not "
|
||||
"implemented yet\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
atmi_status_t err = atmi_malloc(&ptr, size, get_gpu_mem_place(device_id));
|
||||
DP("Tgt alloc data %ld bytes, (tgt:%016llx).\n", size,
|
||||
(long long unsigned)(Elf64_Addr)ptr);
|
||||
|
|
|
@ -1095,9 +1095,16 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id,
|
|||
return DeviceRTL.loadBinary(device_id, image);
|
||||
}
|
||||
|
||||
void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *) {
|
||||
void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *,
|
||||
int32_t kind) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
|
||||
if (kind != TARGET_ALLOC_DEFAULT) {
|
||||
REPORT("Invalid target data allocation kind or requested allocator not "
|
||||
"implemented yet\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return DeviceRTL.dataAlloc(device_id, size);
|
||||
}
|
||||
|
||||
|
|
|
@ -250,8 +250,23 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id,
|
|||
return DeviceInfo.getOffloadEntriesTable(device_id);
|
||||
}
|
||||
|
||||
void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *hst_ptr) {
|
||||
void *ptr = malloc(size);
|
||||
// Sample implementation of explicit memory allocator. For this plugin all kinds
|
||||
// are equivalent to each other.
|
||||
void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *hst_ptr,
|
||||
int32_t kind) {
|
||||
void *ptr = NULL;
|
||||
|
||||
switch (kind) {
|
||||
case TARGET_ALLOC_DEVICE:
|
||||
case TARGET_ALLOC_HOST:
|
||||
case TARGET_ALLOC_SHARED:
|
||||
case TARGET_ALLOC_DEFAULT:
|
||||
ptr = malloc(size);
|
||||
break;
|
||||
default:
|
||||
REPORT("Invalid target data allocation kind");
|
||||
}
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
|
|
|
@ -84,7 +84,14 @@ int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDevId, int32_t DstDevId) {
|
|||
return Manager->isDataExchangeable(SrcDevId, DstDevId);
|
||||
}
|
||||
|
||||
void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HstPtr) {
|
||||
void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HstPtr,
|
||||
int32_t kind) {
|
||||
if (kind != TARGET_ALLOC_DEFAULT) {
|
||||
REPORT("Invalid target data allocation kind or requested allocator not "
|
||||
"implemented yet\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return Manager->dataAlloc(DeviceId, Size, HstPtr);
|
||||
}
|
||||
|
||||
|
|
|
@ -330,10 +330,17 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t ID,
|
|||
// used to generate a table of target variables to pass to
|
||||
// __tgt_rtl_run_region(). The __tgt_rtl_data_alloc() returns NULL in
|
||||
// case an error occurred on the target device.
|
||||
void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr) {
|
||||
void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr,
|
||||
int32_t kind) {
|
||||
int ret;
|
||||
uint64_t addr;
|
||||
|
||||
if (kind != TARGET_ALLOC_DEFAULT) {
|
||||
REPORT("Invalid target data allocation kind or requested allocator not "
|
||||
"implemented yet\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (DeviceInfo.ProcHandles[ID] == NULL) {
|
||||
struct veo_proc_handle *proc_handle;
|
||||
proc_handle = veo_proc_create(DeviceInfo.NodeIds[ID]);
|
||||
|
|
|
@ -38,31 +38,19 @@ EXTERN int omp_get_initial_device(void) {
|
|||
}
|
||||
|
||||
EXTERN void *omp_target_alloc(size_t size, int device_num) {
|
||||
TIMESCOPE();
|
||||
DP("Call to omp_target_alloc for device %d requesting %zu bytes\n",
|
||||
device_num, size);
|
||||
return targetAllocExplicit(size, device_num, TARGET_ALLOC_DEFAULT, __func__);
|
||||
}
|
||||
|
||||
if (size <= 0) {
|
||||
DP("Call to omp_target_alloc with non-positive length\n");
|
||||
return NULL;
|
||||
}
|
||||
EXTERN void *llvm_omp_target_alloc_device(size_t size, int device_num) {
|
||||
return targetAllocExplicit(size, device_num, TARGET_ALLOC_DEVICE, __func__);
|
||||
}
|
||||
|
||||
void *rc = NULL;
|
||||
EXTERN void *llvm_omp_target_alloc_host(size_t size, int device_num) {
|
||||
return targetAllocExplicit(size, device_num, TARGET_ALLOC_HOST, __func__);
|
||||
}
|
||||
|
||||
if (device_num == omp_get_initial_device()) {
|
||||
rc = malloc(size);
|
||||
DP("omp_target_alloc returns host ptr " DPxMOD "\n", DPxPTR(rc));
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (!device_is_ready(device_num)) {
|
||||
DP("omp_target_alloc returns NULL ptr\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rc = PM->Devices[device_num].allocData(size);
|
||||
DP("omp_target_alloc returns device ptr " DPxMOD "\n", DPxPTR(rc));
|
||||
return rc;
|
||||
EXTERN void *llvm_omp_target_alloc_shared(size_t size, int device_num) {
|
||||
return targetAllocExplicit(size, device_num, TARGET_ALLOC_SHARED, __func__);
|
||||
}
|
||||
|
||||
EXTERN void omp_target_free(void *device_ptr, int device_num) {
|
||||
|
|
|
@ -405,8 +405,8 @@ __tgt_target_table *DeviceTy::load_binary(void *Img) {
|
|||
return rc;
|
||||
}
|
||||
|
||||
void *DeviceTy::allocData(int64_t Size, void *HstPtr) {
|
||||
return RTL->data_alloc(RTLDeviceID, Size, HstPtr);
|
||||
void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) {
|
||||
return RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind);
|
||||
}
|
||||
|
||||
int32_t DeviceTy::deleteData(void *TgtPtrBegin) {
|
||||
|
|
|
@ -185,13 +185,16 @@ struct DeviceTy {
|
|||
__tgt_target_table *load_binary(void *Img);
|
||||
|
||||
// device memory allocation/deallocation routines
|
||||
/// Allocates \p Size bytes on the device and returns the address/nullptr when
|
||||
/// Allocates \p Size bytes on the device, host or shared memory space
|
||||
/// (depending on \p Kind) and returns the address/nullptr when
|
||||
/// succeeds/fails. \p HstPtr is an address of the host data which the
|
||||
/// allocated target data will be associated with. If it is unknown, the
|
||||
/// default value of \p HstPtr is nullptr. Note: this function doesn't do
|
||||
/// pointer association. Actually, all the __tgt_rtl_data_alloc
|
||||
/// implementations ignore \p HstPtr.
|
||||
void *allocData(int64_t Size, void *HstPtr = nullptr);
|
||||
/// implementations ignore \p HstPtr. \p Kind dictates what allocator should
|
||||
/// be used (host, shared, device).
|
||||
void *allocData(int64_t Size, void *HstPtr = nullptr,
|
||||
int32_t Kind = TARGET_ALLOC_DEFAULT);
|
||||
/// Deallocates memory which \p TgtPtrBegin points at and returns
|
||||
/// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails.
|
||||
int32_t deleteData(void *TgtPtrBegin);
|
||||
|
|
|
@ -36,6 +36,9 @@ VERS1.0 {
|
|||
omp_target_memcpy_rect;
|
||||
omp_target_associate_ptr;
|
||||
omp_target_disassociate_ptr;
|
||||
llvm_omp_target_alloc_host;
|
||||
llvm_omp_target_alloc_shared;
|
||||
llvm_omp_target_alloc_device;
|
||||
local:
|
||||
*;
|
||||
};
|
||||
|
|
|
@ -328,6 +328,35 @@ static int32_t getParentIndex(int64_t type) {
|
|||
return ((type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1;
|
||||
}
|
||||
|
||||
void *targetAllocExplicit(size_t size, int device_num, int kind,
|
||||
const char *name) {
|
||||
TIMESCOPE();
|
||||
DP("Call to %s for device %d requesting %zu bytes\n", name, device_num, size);
|
||||
|
||||
if (size <= 0) {
|
||||
DP("Call to %s with non-positive length\n", name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void *rc = NULL;
|
||||
|
||||
if (device_num == omp_get_initial_device()) {
|
||||
rc = malloc(size);
|
||||
DP("%s returns host ptr " DPxMOD "\n", name, DPxPTR(rc));
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (!device_is_ready(device_num)) {
|
||||
DP("%s returns NULL ptr\n", name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
DeviceTy &Device = PM->Devices[device_num];
|
||||
rc = Device.allocData(size, nullptr, kind);
|
||||
DP("%s returns device ptr " DPxMOD "\n", name, DPxPTR(rc));
|
||||
return rc;
|
||||
}
|
||||
|
||||
/// Call the user-defined mapper function followed by the appropriate
|
||||
// targetData* function (targetData{Begin,End,Update}).
|
||||
int targetDataMapper(ident_t *loc, DeviceTy &Device, void *arg_base, void *arg,
|
||||
|
|
|
@ -46,6 +46,8 @@ extern int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
|
|||
|
||||
extern void handleTargetOutcome(bool Success, ident_t *Loc);
|
||||
extern int checkDeviceAndCtors(int64_t &DeviceID, ident_t *Loc);
|
||||
extern void *targetAllocExplicit(size_t size, int device_num, int kind,
|
||||
const char *name);
|
||||
|
||||
// This structure stores information of a mapped memory region.
|
||||
struct MapComponentInfoTy {
|
||||
|
|
|
@ -30,7 +30,7 @@ struct RTLInfoTy {
|
|||
typedef int32_t(number_of_devices_ty)();
|
||||
typedef int32_t(init_device_ty)(int32_t);
|
||||
typedef __tgt_target_table *(load_binary_ty)(int32_t, void *);
|
||||
typedef void *(data_alloc_ty)(int32_t, int64_t, void *);
|
||||
typedef void *(data_alloc_ty)(int32_t, int64_t, void *, int32_t);
|
||||
typedef int32_t(data_submit_ty)(int32_t, void *, void *, int64_t);
|
||||
typedef int32_t(data_submit_async_ty)(int32_t, void *, void *, int64_t,
|
||||
__tgt_async_info *);
|
||||
|
|
Loading…
Reference in New Issue