2017-01-26 05:27:24 +08:00
|
|
|
//===-------- omptarget.h - Target independent OpenMP target RTL -- C++ -*-===//
|
|
|
|
//
|
2019-01-19 18:56:40 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2017-01-26 05:27:24 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// Interface to be used by Clang during the codegen of a
|
|
|
|
// target region.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#ifndef _OMPTARGET_H_
|
|
|
|
#define _OMPTARGET_H_
|
|
|
|
|
2021-02-15 02:25:56 +08:00
|
|
|
#include <deque>
|
2017-05-10 22:12:36 +08:00
|
|
|
#include <stddef.h>
|
2021-02-11 01:45:39 +08:00
|
|
|
#include <stdint.h>
|
2017-01-26 05:27:24 +08:00
|
|
|
|
2020-11-19 04:38:25 +08:00
|
|
|
#include <SourceInfo.h>
|
|
|
|
|
2017-01-26 05:27:24 +08:00
|
|
|
#define OFFLOAD_SUCCESS (0)
|
|
|
|
#define OFFLOAD_FAIL (~0)
|
|
|
|
|
2021-02-11 01:45:39 +08:00
|
|
|
#define OFFLOAD_DEVICE_DEFAULT -1
|
|
|
|
|
|
|
|
// Don't format out enums and structs.
|
|
|
|
// clang-format off
|
2017-01-26 05:27:24 +08:00
|
|
|
|
2021-09-06 13:55:30 +08:00
|
|
|
/// return flags of __tgt_target_XXX public APIs
|
|
|
|
enum __tgt_target_return_t : int {
|
|
|
|
/// successful offload executed on a target device
|
|
|
|
OMP_TGT_SUCCESS = 0,
|
|
|
|
/// offload may not execute on the requested target device
|
|
|
|
/// this scenario can be caused by the device not available or unsupported
|
|
|
|
/// as described in the Execution Model in the specifcation
|
|
|
|
/// this status may not be used for target device execution failure
|
|
|
|
/// which should be handled internally in libomptarget
|
|
|
|
OMP_TGT_FAIL = ~0
|
|
|
|
};
|
|
|
|
|
2017-01-26 05:27:24 +08:00
|
|
|
/// Data attributes for each data reference used in an OpenMP target region.
|
|
|
|
enum tgt_map_type {
|
|
|
|
// No flags
|
|
|
|
OMP_TGT_MAPTYPE_NONE = 0x000,
|
|
|
|
// copy data from host to device
|
|
|
|
OMP_TGT_MAPTYPE_TO = 0x001,
|
|
|
|
// copy data from device to host
|
|
|
|
OMP_TGT_MAPTYPE_FROM = 0x002,
|
|
|
|
// copy regardless of the reference count
|
|
|
|
OMP_TGT_MAPTYPE_ALWAYS = 0x004,
|
|
|
|
// force unmapping of data
|
|
|
|
OMP_TGT_MAPTYPE_DELETE = 0x008,
|
|
|
|
// map the pointer as well as the pointee
|
|
|
|
OMP_TGT_MAPTYPE_PTR_AND_OBJ = 0x010,
|
|
|
|
// pass device base address to kernel
|
|
|
|
OMP_TGT_MAPTYPE_TARGET_PARAM = 0x020,
|
|
|
|
// return base device address of mapped data
|
|
|
|
OMP_TGT_MAPTYPE_RETURN_PARAM = 0x040,
|
|
|
|
// private variable - not mapped
|
|
|
|
OMP_TGT_MAPTYPE_PRIVATE = 0x080,
|
|
|
|
// copy by value - not mapped
|
|
|
|
OMP_TGT_MAPTYPE_LITERAL = 0x100,
|
|
|
|
// mapping is implicit
|
|
|
|
OMP_TGT_MAPTYPE_IMPLICIT = 0x200,
|
2019-08-10 05:32:57 +08:00
|
|
|
// copy data to device
|
|
|
|
OMP_TGT_MAPTYPE_CLOSE = 0x400,
|
2020-07-23 02:04:58 +08:00
|
|
|
// runtime error if not already allocated
|
|
|
|
OMP_TGT_MAPTYPE_PRESENT = 0x1000,
|
2021-09-01 03:21:16 +08:00
|
|
|
// use a separate reference counter so that the data cannot be unmapped within
|
|
|
|
// the structured region
|
|
|
|
// This is an OpenMP extension for the sake of OpenACC support.
|
|
|
|
OMP_TGT_MAPTYPE_OMPX_HOLD = 0x2000,
|
2020-11-20 01:16:09 +08:00
|
|
|
// descriptor for non-contiguous target-update
|
|
|
|
OMP_TGT_MAPTYPE_NON_CONTIG = 0x100000000000,
|
2018-07-19 21:41:03 +08:00
|
|
|
// member of struct, member given by [16 MSBs] - 1
|
2017-01-26 05:27:24 +08:00
|
|
|
OMP_TGT_MAPTYPE_MEMBER_OF = 0xffff000000000000
|
|
|
|
};
|
|
|
|
|
|
|
|
enum OpenMPOffloadingDeclareTargetFlags {
|
|
|
|
/// Mark the entry as having a 'link' attribute.
|
|
|
|
OMP_DECLARE_TARGET_LINK = 0x01,
|
|
|
|
/// Mark the entry as being a global constructor.
|
|
|
|
OMP_DECLARE_TARGET_CTOR = 0x02,
|
|
|
|
/// Mark the entry as being a global destructor.
|
|
|
|
OMP_DECLARE_TARGET_DTOR = 0x04
|
|
|
|
};
|
|
|
|
|
[OpenMP][libomptarget] Enable requires flags for target libraries.
Summary:
Target link variables are currently implemented by creating a copy of the variables on the device side and unified memory never gets exploited.
When the prgram uses the:
```
#pragma omp requires unified_shared_memory
```
directive in conjunction with a declare target link, the linked variable is no longer allocated on the device and the host version is used instead.
This behavior is overridden by performing an explicit mapping.
A Clang side patch is required.
Reviewers: ABataev, AlexEichenberger, grokos, Hahnfeld
Reviewed By: AlexEichenberger, grokos, Hahnfeld
Subscribers: Hahnfeld, jfb, guansong, jdoerfert, openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D60223
llvm-svn: 361294
2019-05-22 03:35:02 +08:00
|
|
|
enum OpenMPOffloadingRequiresDirFlags {
|
|
|
|
/// flag undefined.
|
|
|
|
OMP_REQ_UNDEFINED = 0x000,
|
|
|
|
/// no requires directive present.
|
|
|
|
OMP_REQ_NONE = 0x001,
|
|
|
|
/// reverse_offload clause.
|
|
|
|
OMP_REQ_REVERSE_OFFLOAD = 0x002,
|
|
|
|
/// unified_address clause.
|
|
|
|
OMP_REQ_UNIFIED_ADDRESS = 0x004,
|
|
|
|
/// unified_shared_memory clause.
|
|
|
|
OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
|
|
|
|
/// dynamic_allocators clause.
|
|
|
|
OMP_REQ_DYNAMIC_ALLOCATORS = 0x010
|
|
|
|
};
|
|
|
|
|
2021-03-04 03:48:32 +08:00
|
|
|
enum TargetAllocTy : int32_t {
|
|
|
|
TARGET_ALLOC_DEVICE = 0,
|
|
|
|
TARGET_ALLOC_HOST,
|
|
|
|
TARGET_ALLOC_SHARED,
|
|
|
|
TARGET_ALLOC_DEFAULT
|
|
|
|
};
|
|
|
|
|
2017-01-26 05:27:24 +08:00
|
|
|
/// This struct is a record of an entry point or global. For a function
|
|
|
|
/// entry point the size is expected to be zero
|
|
|
|
struct __tgt_offload_entry {
|
|
|
|
void *addr; // Pointer to the offload entry info (function or global)
|
|
|
|
char *name; // Name of the function or global
|
|
|
|
size_t size; // Size of the entry info (0 if it is a function)
|
|
|
|
int32_t flags; // Flags associated with the entry, e.g. 'link'.
|
|
|
|
int32_t reserved; // Reserved, to be used by the runtime library.
|
|
|
|
};
|
|
|
|
|
|
|
|
/// This struct is a record of the device image information
|
|
|
|
struct __tgt_device_image {
|
|
|
|
void *ImageStart; // Pointer to the target code start
|
|
|
|
void *ImageEnd; // Pointer to the target code end
|
|
|
|
__tgt_offload_entry *EntriesBegin; // Begin of table with all target entries
|
|
|
|
__tgt_offload_entry *EntriesEnd; // End of table (non inclusive)
|
|
|
|
};
|
|
|
|
|
|
|
|
/// This struct is a record of all the host code that may be offloaded to a
|
|
|
|
/// target.
|
|
|
|
struct __tgt_bin_desc {
|
|
|
|
int32_t NumDeviceImages; // Number of device types supported
|
|
|
|
__tgt_device_image *DeviceImages; // Array of device images (1 per dev. type)
|
|
|
|
__tgt_offload_entry *HostEntriesBegin; // Begin of table with all host entries
|
|
|
|
__tgt_offload_entry *HostEntriesEnd; // End of table (non inclusive)
|
|
|
|
};
|
|
|
|
|
|
|
|
/// This struct contains the offload entries identified by the target runtime
|
|
|
|
struct __tgt_target_table {
|
|
|
|
__tgt_offload_entry *EntriesBegin; // Begin of the table with all the entries
|
|
|
|
__tgt_offload_entry
|
|
|
|
*EntriesEnd; // End of the table with all the entries (non inclusive)
|
|
|
|
};
|
|
|
|
|
2021-02-11 01:06:00 +08:00
|
|
|
// clang-format on
|
|
|
|
|
[OpenMP] Optimized stream selection by scheduling data mapping for the same target region into a same stream
Summary:
This patch introduces two things for offloading:
1. Asynchronous data transferring: those functions are suffix with `_async`. They have one more argument compared with their synchronous counterparts: `__tgt_async_info*`, which is a new struct that only has one field, `void *Identifier`. This struct is for information exchange between different asynchronous operations. It can be used for stream selection, like in this case, or operation synchronization, which is also used. We may expect more usages in the future.
2. Optimization of stream selection for data mapping. Previous implementation was using asynchronous device memory transfer but synchronizing after each memory transfer. Actually, if we say kernel A needs four memory copy to device and two memory copy back to host, then we can schedule these seven operations (four H2D, two D2H, and one kernel launch) into a same stream and just need synchronization after memory copy from device to host. In this way, we can save a huge overhead compared with synchronization after each operation.
Reviewers: jdoerfert, ye-luo
Reviewed By: jdoerfert
Subscribers: yaxunl, lildmh, guansong, openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D77005
2020-04-08 02:51:56 +08:00
|
|
|
/// This struct contains information exchanged between different asynchronous
|
|
|
|
/// operations for device-dependent optimization and potential synchronization
|
|
|
|
struct __tgt_async_info {
|
|
|
|
// A pointer to a queue-like structure where offloading operations are issued.
|
2020-06-18 03:01:16 +08:00
|
|
|
// We assume to use this structure to do synchronization. In CUDA backend, it
|
|
|
|
// is CUstream.
|
[OpenMP] Optimized stream selection by scheduling data mapping for the same target region into a same stream
Summary:
This patch introduces two things for offloading:
1. Asynchronous data transferring: those functions are suffix with `_async`. They have one more argument compared with their synchronous counterparts: `__tgt_async_info*`, which is a new struct that only has one field, `void *Identifier`. This struct is for information exchange between different asynchronous operations. It can be used for stream selection, like in this case, or operation synchronization, which is also used. We may expect more usages in the future.
2. Optimization of stream selection for data mapping. Previous implementation was using asynchronous device memory transfer but synchronizing after each memory transfer. Actually, if we say kernel A needs four memory copy to device and two memory copy back to host, then we can schedule these seven operations (four H2D, two D2H, and one kernel launch) into a same stream and just need synchronization after memory copy from device to host. In this way, we can save a huge overhead compared with synchronization after each operation.
Reviewers: jdoerfert, ye-luo
Reviewed By: jdoerfert
Subscribers: yaxunl, lildmh, guansong, openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D77005
2020-04-08 02:51:56 +08:00
|
|
|
void *Queue = nullptr;
|
|
|
|
};
|
|
|
|
|
2021-02-11 01:06:00 +08:00
|
|
|
struct DeviceTy;
|
|
|
|
|
|
|
|
/// The libomptarget wrapper around a __tgt_async_info object directly
|
|
|
|
/// associated with a libomptarget layer device. RAII semantics to avoid
|
|
|
|
/// mistakes.
|
|
|
|
class AsyncInfoTy {
|
2021-02-15 02:25:56 +08:00
|
|
|
/// Locations we used in (potentially) asynchronous calls which should live
|
|
|
|
/// as long as this AsyncInfoTy object.
|
|
|
|
std::deque<void *> BufferLocations;
|
|
|
|
|
2021-02-11 01:06:00 +08:00
|
|
|
__tgt_async_info AsyncInfo;
|
|
|
|
DeviceTy &Device;
|
|
|
|
|
|
|
|
public:
|
|
|
|
AsyncInfoTy(DeviceTy &Device) : Device(Device) {}
|
|
|
|
~AsyncInfoTy() { synchronize(); }
|
|
|
|
|
|
|
|
/// Implicit conversion to the __tgt_async_info which is used in the
|
|
|
|
/// plugin interface.
|
|
|
|
operator __tgt_async_info *() { return &AsyncInfo; }
|
|
|
|
|
|
|
|
/// Synchronize all pending actions.
|
|
|
|
///
|
|
|
|
/// \returns OFFLOAD_FAIL or OFFLOAD_SUCCESS appropriately.
|
|
|
|
int synchronize();
|
2021-02-15 02:25:56 +08:00
|
|
|
|
|
|
|
/// Return a void* reference with a lifetime that is at least as long as this
|
|
|
|
/// AsyncInfoTy object. The location can be used as intermediate buffer.
|
|
|
|
void *&getVoidPtrLocation();
|
2021-02-11 01:06:00 +08:00
|
|
|
};
|
|
|
|
|
2020-11-20 01:16:09 +08:00
|
|
|
/// This struct is a record of non-contiguous information
|
|
|
|
struct __tgt_target_non_contig {
|
|
|
|
uint64_t Offset;
|
|
|
|
uint64_t Count;
|
|
|
|
uint64_t Stride;
|
|
|
|
};
|
|
|
|
|
2017-01-26 05:27:24 +08:00
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
|
|
|
int omp_get_num_devices(void);
|
|
|
|
int omp_get_initial_device(void);
|
|
|
|
void *omp_target_alloc(size_t size, int device_num);
|
|
|
|
void omp_target_free(void *device_ptr, int device_num);
|
2021-04-13 09:20:27 +08:00
|
|
|
int omp_target_is_present(const void *ptr, int device_num);
|
|
|
|
int omp_target_memcpy(void *dst, const void *src, size_t length,
|
|
|
|
size_t dst_offset, size_t src_offset, int dst_device,
|
|
|
|
int src_device);
|
|
|
|
int omp_target_memcpy_rect(void *dst, const void *src, size_t element_size,
|
2021-02-11 01:45:39 +08:00
|
|
|
int num_dims, const size_t *volume,
|
|
|
|
const size_t *dst_offsets, const size_t *src_offsets,
|
|
|
|
const size_t *dst_dimensions,
|
|
|
|
const size_t *src_dimensions, int dst_device,
|
|
|
|
int src_device);
|
2021-04-13 09:20:27 +08:00
|
|
|
int omp_target_associate_ptr(const void *host_ptr, const void *device_ptr,
|
|
|
|
size_t size, size_t device_offset, int device_num);
|
|
|
|
int omp_target_disassociate_ptr(const void *host_ptr, int device_num);
|
2017-01-26 05:27:24 +08:00
|
|
|
|
2021-03-04 03:48:32 +08:00
|
|
|
/// Explicit target memory allocators
|
|
|
|
/// Using the llvm_ prefix until they become part of the OpenMP standard.
|
|
|
|
void *llvm_omp_target_alloc_device(size_t size, int device_num);
|
|
|
|
void *llvm_omp_target_alloc_host(size_t size, int device_num);
|
|
|
|
void *llvm_omp_target_alloc_shared(size_t size, int device_num);
|
|
|
|
|
2021-10-02 02:37:02 +08:00
|
|
|
/// Dummy target so we have a symbol for generating host fallback.
|
|
|
|
void *llvm_omp_get_dynamic_shared();
|
|
|
|
|
[OpenMP][libomptarget] Enable requires flags for target libraries.
Summary:
Target link variables are currently implemented by creating a copy of the variables on the device side and unified memory never gets exploited.
When the prgram uses the:
```
#pragma omp requires unified_shared_memory
```
directive in conjunction with a declare target link, the linked variable is no longer allocated on the device and the host version is used instead.
This behavior is overridden by performing an explicit mapping.
A Clang side patch is required.
Reviewers: ABataev, AlexEichenberger, grokos, Hahnfeld
Reviewed By: AlexEichenberger, grokos, Hahnfeld
Subscribers: Hahnfeld, jfb, guansong, jdoerfert, openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D60223
llvm-svn: 361294
2019-05-22 03:35:02 +08:00
|
|
|
/// add the clauses of the requires directives in a given file
|
|
|
|
void __tgt_register_requires(int64_t flags);
|
|
|
|
|
2017-01-26 05:27:24 +08:00
|
|
|
/// adds a target shared library to the target execution image
|
|
|
|
void __tgt_register_lib(__tgt_bin_desc *desc);
|
|
|
|
|
2021-07-28 10:38:27 +08:00
|
|
|
/// Initialize all RTLs at once
|
|
|
|
void __tgt_init_all_rtls();
|
|
|
|
|
2017-01-26 05:27:24 +08:00
|
|
|
/// removes a target shared library from the target execution image
|
|
|
|
void __tgt_unregister_lib(__tgt_bin_desc *desc);
|
|
|
|
|
|
|
|
// creates the host to target data mapping, stores it in the
|
|
|
|
// libomptarget.so internal structure (an entry in a stack of data maps) and
|
|
|
|
// passes the data to the device;
|
2017-11-22 02:26:41 +08:00
|
|
|
void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
|
2017-01-26 05:27:24 +08:00
|
|
|
void **args_base, void **args, int64_t *arg_sizes,
|
2017-11-22 02:26:41 +08:00
|
|
|
int64_t *arg_types);
|
|
|
|
void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
|
2017-01-26 05:27:24 +08:00
|
|
|
void **args_base, void **args,
|
2017-11-22 02:26:41 +08:00
|
|
|
int64_t *arg_sizes, int64_t *arg_types,
|
2017-01-26 05:27:24 +08:00
|
|
|
int32_t depNum, void *depList,
|
|
|
|
int32_t noAliasDepNum,
|
|
|
|
void *noAliasDepList);
|
2020-11-20 00:56:59 +08:00
|
|
|
void __tgt_target_data_begin_mapper(ident_t *loc, int64_t device_id,
|
|
|
|
int32_t arg_num, void **args_base,
|
|
|
|
void **args, int64_t *arg_sizes,
|
|
|
|
int64_t *arg_types,
|
2020-11-19 04:38:25 +08:00
|
|
|
map_var_info_t *arg_names,
|
|
|
|
void **arg_mappers);
|
|
|
|
void __tgt_target_data_begin_nowait_mapper(
|
2020-11-20 00:56:59 +08:00
|
|
|
ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
|
|
|
|
void **args, int64_t *arg_sizes, int64_t *arg_types,
|
|
|
|
map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
|
|
|
|
void *depList, int32_t noAliasDepNum, void *noAliasDepList);
|
2017-01-26 05:27:24 +08:00
|
|
|
|
|
|
|
// passes data from the target, release target memory and destroys the
|
|
|
|
// host-target mapping (top entry from the stack of data maps) created by
|
|
|
|
// the last __tgt_target_data_begin
|
2017-11-22 02:26:41 +08:00
|
|
|
void __tgt_target_data_end(int64_t device_id, int32_t arg_num, void **args_base,
|
|
|
|
void **args, int64_t *arg_sizes, int64_t *arg_types);
|
|
|
|
void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
|
2017-01-26 05:27:24 +08:00
|
|
|
void **args_base, void **args,
|
2017-11-22 02:26:41 +08:00
|
|
|
int64_t *arg_sizes, int64_t *arg_types,
|
2017-01-26 05:27:24 +08:00
|
|
|
int32_t depNum, void *depList,
|
|
|
|
int32_t noAliasDepNum, void *noAliasDepList);
|
2020-11-20 00:56:59 +08:00
|
|
|
void __tgt_target_data_end_mapper(ident_t *loc, int64_t device_id,
|
|
|
|
int32_t arg_num, void **args_base,
|
|
|
|
void **args, int64_t *arg_sizes,
|
|
|
|
int64_t *arg_types, map_var_info_t *arg_names,
|
2020-11-19 04:38:25 +08:00
|
|
|
void **arg_mappers);
|
2020-11-20 00:56:59 +08:00
|
|
|
void __tgt_target_data_end_nowait_mapper(
|
|
|
|
ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
|
|
|
|
void **args, int64_t *arg_sizes, int64_t *arg_types,
|
|
|
|
map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
|
|
|
|
void *depList, int32_t noAliasDepNum, void *noAliasDepList);
|
2017-01-26 05:27:24 +08:00
|
|
|
|
|
|
|
/// passes data to/from the target
|
2017-11-22 02:26:41 +08:00
|
|
|
void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
|
2017-01-26 05:27:24 +08:00
|
|
|
void **args_base, void **args, int64_t *arg_sizes,
|
2017-11-22 02:26:41 +08:00
|
|
|
int64_t *arg_types);
|
|
|
|
void __tgt_target_data_update_nowait(int64_t device_id, int32_t arg_num,
|
2017-01-26 05:27:24 +08:00
|
|
|
void **args_base, void **args,
|
2017-11-22 02:26:41 +08:00
|
|
|
int64_t *arg_sizes, int64_t *arg_types,
|
2017-01-26 05:27:24 +08:00
|
|
|
int32_t depNum, void *depList,
|
|
|
|
int32_t noAliasDepNum,
|
|
|
|
void *noAliasDepList);
|
2020-11-20 00:56:59 +08:00
|
|
|
void __tgt_target_data_update_mapper(ident_t *loc, int64_t device_id,
|
|
|
|
int32_t arg_num, void **args_base,
|
|
|
|
void **args, int64_t *arg_sizes,
|
|
|
|
int64_t *arg_types,
|
2020-11-19 04:38:25 +08:00
|
|
|
map_var_info_t *arg_names,
|
|
|
|
void **arg_mappers);
|
2020-07-16 04:24:03 +08:00
|
|
|
void __tgt_target_data_update_nowait_mapper(
|
2020-11-20 00:56:59 +08:00
|
|
|
ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
|
|
|
|
void **args, int64_t *arg_sizes, int64_t *arg_types,
|
|
|
|
map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
|
|
|
|
void *depList, int32_t noAliasDepNum, void *noAliasDepList);
|
2017-01-26 05:27:24 +08:00
|
|
|
|
|
|
|
// Performs the same actions as data_begin in case arg_num is non-zero
|
|
|
|
// and initiates run of offloaded region on target platform; if arg_num
|
|
|
|
// is non-zero after the region execution is done it also performs the
|
|
|
|
// same action as data_end above. The following types are used; this
|
|
|
|
// function returns 0 if it was able to transfer the execution to a
|
|
|
|
// target and an int different from zero otherwise.
|
2017-11-22 02:26:41 +08:00
|
|
|
int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num,
|
2017-01-26 05:27:24 +08:00
|
|
|
void **args_base, void **args, int64_t *arg_sizes,
|
2017-11-22 02:26:41 +08:00
|
|
|
int64_t *arg_types);
|
|
|
|
int __tgt_target_nowait(int64_t device_id, void *host_ptr, int32_t arg_num,
|
2017-01-26 05:27:24 +08:00
|
|
|
void **args_base, void **args, int64_t *arg_sizes,
|
2017-11-22 02:26:41 +08:00
|
|
|
int64_t *arg_types, int32_t depNum, void *depList,
|
2017-01-26 05:27:24 +08:00
|
|
|
int32_t noAliasDepNum, void *noAliasDepList);
|
2020-11-20 00:56:59 +08:00
|
|
|
int __tgt_target_mapper(ident_t *loc, int64_t device_id, void *host_ptr,
|
|
|
|
int32_t arg_num, void **args_base, void **args,
|
|
|
|
int64_t *arg_sizes, int64_t *arg_types,
|
|
|
|
map_var_info_t *arg_names, void **arg_mappers);
|
|
|
|
int __tgt_target_nowait_mapper(ident_t *loc, int64_t device_id, void *host_ptr,
|
2020-07-16 04:24:03 +08:00
|
|
|
int32_t arg_num, void **args_base, void **args,
|
|
|
|
int64_t *arg_sizes, int64_t *arg_types,
|
2020-11-19 04:38:25 +08:00
|
|
|
map_var_info_t *arg_names, void **arg_mappers,
|
2020-11-14 02:06:41 +08:00
|
|
|
int32_t depNum, void *depList,
|
|
|
|
int32_t noAliasDepNum, void *noAliasDepList);
|
2017-01-26 05:27:24 +08:00
|
|
|
|
2017-11-22 02:26:41 +08:00
|
|
|
int __tgt_target_teams(int64_t device_id, void *host_ptr, int32_t arg_num,
|
2017-01-26 05:27:24 +08:00
|
|
|
void **args_base, void **args, int64_t *arg_sizes,
|
2017-11-22 02:26:41 +08:00
|
|
|
int64_t *arg_types, int32_t num_teams,
|
2017-01-26 05:27:24 +08:00
|
|
|
int32_t thread_limit);
|
2017-11-22 02:26:41 +08:00
|
|
|
int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
|
2017-01-26 05:27:24 +08:00
|
|
|
int32_t arg_num, void **args_base, void **args,
|
2017-11-22 02:26:41 +08:00
|
|
|
int64_t *arg_sizes, int64_t *arg_types,
|
2017-01-26 05:27:24 +08:00
|
|
|
int32_t num_teams, int32_t thread_limit,
|
|
|
|
int32_t depNum, void *depList,
|
|
|
|
int32_t noAliasDepNum, void *noAliasDepList);
|
2020-11-20 00:56:59 +08:00
|
|
|
int __tgt_target_teams_mapper(ident_t *loc, int64_t device_id, void *host_ptr,
|
2020-07-16 04:24:03 +08:00
|
|
|
int32_t arg_num, void **args_base, void **args,
|
|
|
|
int64_t *arg_sizes, int64_t *arg_types,
|
2020-11-19 04:38:25 +08:00
|
|
|
map_var_info_t *arg_names, void **arg_mappers,
|
2020-11-14 02:06:41 +08:00
|
|
|
int32_t num_teams, int32_t thread_limit);
|
2020-07-16 04:24:03 +08:00
|
|
|
int __tgt_target_teams_nowait_mapper(
|
2020-11-20 00:56:59 +08:00
|
|
|
ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num,
|
|
|
|
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
|
2020-11-19 04:38:25 +08:00
|
|
|
map_var_info_t *arg_names, void **arg_mappers, int32_t num_teams,
|
|
|
|
int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum,
|
|
|
|
void *noAliasDepList);
|
2020-07-16 04:24:03 +08:00
|
|
|
|
2021-03-11 02:25:33 +08:00
|
|
|
void __kmpc_push_target_tripcount(int64_t device_id, uint64_t loop_tripcount);
|
|
|
|
|
|
|
|
void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id,
|
|
|
|
uint64_t loop_tripcount);
|
2017-01-26 05:27:24 +08:00
|
|
|
|
2021-04-22 05:31:09 +08:00
|
|
|
void __tgt_set_info_flag(uint32_t);
|
|
|
|
|
2021-07-28 09:47:40 +08:00
|
|
|
int __tgt_print_device_info(int64_t device_id);
|
2017-01-26 05:27:24 +08:00
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
#define EXTERN extern "C"
|
|
|
|
#else
|
|
|
|
#define EXTERN extern
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif // _OMPTARGET_H_
|