2017-05-10 22:12:36 +08:00
|
|
|
//===-- omptargetplugin.h - Target dependent OpenMP Plugin API --*- C++ -*-===//
|
|
|
|
//
|
2019-01-19 18:56:40 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2017-05-10 22:12:36 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file defines an interface between target independent OpenMP offload
|
|
|
|
// runtime library libomptarget and target dependent plugin.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#ifndef _OMPTARGETPLUGIN_H_
|
|
|
|
#define _OMPTARGETPLUGIN_H_
|
|
|
|
|
|
|
|
#include <omptarget.h>
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
|
|
|
// Return the number of available devices of the type supported by the
|
|
|
|
// target RTL.
|
|
|
|
int32_t __tgt_rtl_number_of_devices(void);
|
|
|
|
|
|
|
|
// Return an integer different from zero if the provided device image can be
|
|
|
|
// supported by the runtime. The functionality is similar to comparing the
|
|
|
|
// result of __tgt__rtl__load__binary to NULL. However, this is meant to be a
|
|
|
|
// lightweight query to determine if the RTL is suitable for an image without
|
|
|
|
// having to load the library, which can be expensive.
|
|
|
|
int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image);
|
|
|
|
|
2020-06-05 04:58:37 +08:00
|
|
|
// Return an integer other than zero if the data can be exchaned from SrcDevId
|
|
|
|
// to DstDevId. If it is data exchangable, the device plugin should provide
|
|
|
|
// function to move data from source device to destination device directly.
|
|
|
|
int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDevId, int32_t DstDevId);
|
|
|
|
|
2021-02-25 01:14:35 +08:00
|
|
|
// Return an integer other than zero if the plugin can handle images which do
|
|
|
|
// not contain target regions and global variables (but can contain other
|
|
|
|
// functions)
|
|
|
|
int32_t __tgt_rtl_supports_empty_images();
|
|
|
|
|
2019-06-04 23:05:53 +08:00
|
|
|
// Initialize the requires flags for the device.
|
|
|
|
int64_t __tgt_rtl_init_requires(int64_t RequiresFlags);
|
|
|
|
|
2017-05-10 22:12:36 +08:00
|
|
|
// Initialize the specified device. In case of success return 0; otherwise
|
|
|
|
// return an error code.
|
|
|
|
int32_t __tgt_rtl_init_device(int32_t ID);
|
|
|
|
|
|
|
|
// Pass an executable image section described by image to the specified
|
|
|
|
// device and prepare an address table of target entities. In case of error,
|
|
|
|
// return NULL. Otherwise, return a pointer to the built address table.
|
|
|
|
// Individual entries in the table may also be NULL, when the corresponding
|
|
|
|
// offload region is not supported on the target device.
|
|
|
|
__tgt_target_table *__tgt_rtl_load_binary(int32_t ID,
|
|
|
|
__tgt_device_image *Image);
|
|
|
|
|
|
|
|
// Allocate data on the particular target device, of the specified size.
|
|
|
|
// HostPtr is a address of the host data the allocated target data
|
|
|
|
// will be associated with (HostPtr may be NULL if it is not known at
|
|
|
|
// allocation time, like for example it would be for target data that
|
|
|
|
// is allocated by omp_target_alloc() API). Return address of the
|
|
|
|
// allocated data on the target that will be used by libomptarget.so to
|
|
|
|
// initialize the target data mapping structures. These addresses are
|
|
|
|
// used to generate a table of target variables to pass to
|
|
|
|
// __tgt_rtl_run_region(). The __tgt_rtl_data_alloc() returns NULL in
|
2021-03-04 03:48:32 +08:00
|
|
|
// case an error occurred on the target device. Kind dictates what allocator
|
|
|
|
// to use (e.g. shared, host, device).
|
|
|
|
void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr,
|
|
|
|
int32_t Kind);
|
2017-05-10 22:12:36 +08:00
|
|
|
|
2020-04-10 10:40:30 +08:00
|
|
|
// Pass the data content to the target device using the target address. In case
|
|
|
|
// of success, return zero. Otherwise, return an error code.
|
2017-05-10 22:12:36 +08:00
|
|
|
int32_t __tgt_rtl_data_submit(int32_t ID, void *TargetPtr, void *HostPtr,
|
2020-04-10 10:40:30 +08:00
|
|
|
int64_t Size);
|
2017-05-10 22:12:36 +08:00
|
|
|
|
2020-04-10 10:40:30 +08:00
|
|
|
int32_t __tgt_rtl_data_submit_async(int32_t ID, void *TargetPtr, void *HostPtr,
|
2021-02-11 04:04:37 +08:00
|
|
|
int64_t Size, __tgt_async_info *AsyncInfo);
|
2020-04-10 10:40:30 +08:00
|
|
|
|
|
|
|
// Retrieve the data content from the target device using its address. In case
|
|
|
|
// of success, return zero. Otherwise, return an error code.
|
2017-05-10 22:12:36 +08:00
|
|
|
int32_t __tgt_rtl_data_retrieve(int32_t ID, void *HostPtr, void *TargetPtr,
|
2020-04-10 10:40:30 +08:00
|
|
|
int64_t Size);
|
|
|
|
|
|
|
|
// Asynchronous version of __tgt_rtl_data_retrieve
|
|
|
|
int32_t __tgt_rtl_data_retrieve_async(int32_t ID, void *HostPtr,
|
|
|
|
void *TargetPtr, int64_t Size,
|
2021-02-11 04:04:37 +08:00
|
|
|
__tgt_async_info *AsyncInfo);
|
2017-05-10 22:12:36 +08:00
|
|
|
|
2020-06-05 04:58:37 +08:00
|
|
|
// Copy the data content from one target device to another target device using
|
|
|
|
// its address. This operation does not need to copy data back to host and then
|
|
|
|
// from host to another device. In case of success, return zero. Otherwise,
|
|
|
|
// return an error code.
|
|
|
|
int32_t __tgt_rtl_data_exchange(int32_t SrcID, void *SrcPtr, int32_t DstID,
|
|
|
|
void *DstPtr, int64_t Size);
|
|
|
|
|
|
|
|
// Asynchronous version of __tgt_rtl_data_exchange
|
|
|
|
int32_t __tgt_rtl_data_exchange_async(int32_t SrcID, void *SrcPtr,
|
|
|
|
int32_t DesID, void *DstPtr, int64_t Size,
|
2021-02-11 04:04:37 +08:00
|
|
|
__tgt_async_info *AsyncInfo);
|
2020-06-05 04:58:37 +08:00
|
|
|
|
2017-05-10 22:12:36 +08:00
|
|
|
// De-allocate the data referenced by target ptr on the device. In case of
|
|
|
|
// success, return zero. Otherwise, return an error code.
|
|
|
|
int32_t __tgt_rtl_data_delete(int32_t ID, void *TargetPtr);
|
|
|
|
|
|
|
|
// Transfer control to the offloaded entry Entry on the target device.
|
|
|
|
// Args and Offsets are arrays of NumArgs size of target addresses and
|
|
|
|
// offsets. An offset should be added to the target address before passing it
|
2021-02-11 04:04:37 +08:00
|
|
|
// to the outlined function on device side. If AsyncInfo is nullptr, it is
|
|
|
|
// synchronous; otherwise it is asynchronous. However, AsyncInfo may be
|
[OpenMP] Optimized stream selection by scheduling data mapping for the same target region into a same stream
Summary:
This patch introduces two things for offloading:
1. Asynchronous data transferring: those functions are suffix with `_async`. They have one more argument compared with their synchronous counterparts: `__tgt_async_info*`, which is a new struct that only has one field, `void *Identifier`. This struct is for information exchange between different asynchronous operations. It can be used for stream selection, like in this case, or operation synchronization, which is also used. We may expect more usages in the future.
2. Optimization of stream selection for data mapping. Previous implementation was using asynchronous device memory transfer but synchronizing after each memory transfer. Actually, if we say kernel A needs four memory copy to device and two memory copy back to host, then we can schedule these seven operations (four H2D, two D2H, and one kernel launch) into a same stream and just need synchronization after memory copy from device to host. In this way, we can save a huge overhead compared with synchronization after each operation.
Reviewers: jdoerfert, ye-luo
Reviewed By: jdoerfert
Subscribers: yaxunl, lildmh, guansong, openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D77005
2020-04-08 02:51:56 +08:00
|
|
|
// ignored on some platforms, like x86_64. In that case, it is synchronous. In
|
|
|
|
// case of success, return zero. Otherwise, return an error code.
|
2017-05-10 22:12:36 +08:00
|
|
|
int32_t __tgt_rtl_run_target_region(int32_t ID, void *Entry, void **Args,
|
2020-04-10 10:40:30 +08:00
|
|
|
ptrdiff_t *Offsets, int32_t NumArgs);
|
|
|
|
|
|
|
|
// Asynchronous version of __tgt_rtl_run_target_region
|
|
|
|
int32_t __tgt_rtl_run_target_region_async(int32_t ID, void *Entry, void **Args,
|
|
|
|
ptrdiff_t *Offsets, int32_t NumArgs,
|
2021-02-11 04:04:37 +08:00
|
|
|
__tgt_async_info *AsyncInfo);
|
2017-05-10 22:12:36 +08:00
|
|
|
|
|
|
|
// Similar to __tgt_rtl_run_target_region, but additionally specify the
|
[OpenMP] Optimized stream selection by scheduling data mapping for the same target region into a same stream
Summary:
This patch introduces two things for offloading:
1. Asynchronous data transferring: those functions are suffix with `_async`. They have one more argument compared with their synchronous counterparts: `__tgt_async_info*`, which is a new struct that only has one field, `void *Identifier`. This struct is for information exchange between different asynchronous operations. It can be used for stream selection, like in this case, or operation synchronization, which is also used. We may expect more usages in the future.
2. Optimization of stream selection for data mapping. Previous implementation was using asynchronous device memory transfer but synchronizing after each memory transfer. Actually, if we say kernel A needs four memory copy to device and two memory copy back to host, then we can schedule these seven operations (four H2D, two D2H, and one kernel launch) into a same stream and just need synchronization after memory copy from device to host. In this way, we can save a huge overhead compared with synchronization after each operation.
Reviewers: jdoerfert, ye-luo
Reviewed By: jdoerfert
Subscribers: yaxunl, lildmh, guansong, openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D77005
2020-04-08 02:51:56 +08:00
|
|
|
// number of teams to be created and a number of threads in each team. If
|
2021-02-11 04:04:37 +08:00
|
|
|
// AsyncInfo is nullptr, it is synchronous; otherwise it is asynchronous.
|
|
|
|
// However, AsyncInfo may be ignored on some platforms, like x86_64. In that
|
[OpenMP] Optimized stream selection by scheduling data mapping for the same target region into a same stream
Summary:
This patch introduces two things for offloading:
1. Asynchronous data transferring: those functions are suffix with `_async`. They have one more argument compared with their synchronous counterparts: `__tgt_async_info*`, which is a new struct that only has one field, `void *Identifier`. This struct is for information exchange between different asynchronous operations. It can be used for stream selection, like in this case, or operation synchronization, which is also used. We may expect more usages in the future.
2. Optimization of stream selection for data mapping. Previous implementation was using asynchronous device memory transfer but synchronizing after each memory transfer. Actually, if we say kernel A needs four memory copy to device and two memory copy back to host, then we can schedule these seven operations (four H2D, two D2H, and one kernel launch) into a same stream and just need synchronization after memory copy from device to host. In this way, we can save a huge overhead compared with synchronization after each operation.
Reviewers: jdoerfert, ye-luo
Reviewed By: jdoerfert
Subscribers: yaxunl, lildmh, guansong, openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D77005
2020-04-08 02:51:56 +08:00
|
|
|
// case, it is synchronous.
|
2017-05-10 22:12:36 +08:00
|
|
|
int32_t __tgt_rtl_run_target_team_region(int32_t ID, void *Entry, void **Args,
|
|
|
|
ptrdiff_t *Offsets, int32_t NumArgs,
|
|
|
|
int32_t NumTeams, int32_t ThreadLimit,
|
2020-04-10 10:40:30 +08:00
|
|
|
uint64_t loop_tripcount);
|
|
|
|
|
|
|
|
// Asynchronous version of __tgt_rtl_run_target_team_region
|
|
|
|
int32_t __tgt_rtl_run_target_team_region_async(
|
|
|
|
int32_t ID, void *Entry, void **Args, ptrdiff_t *Offsets, int32_t NumArgs,
|
|
|
|
int32_t NumTeams, int32_t ThreadLimit, uint64_t loop_tripcount,
|
2021-02-11 04:04:37 +08:00
|
|
|
__tgt_async_info *AsyncInfo);
|
[OpenMP] Optimized stream selection by scheduling data mapping for the same target region into a same stream
Summary:
This patch introduces two things for offloading:
1. Asynchronous data transferring: those functions are suffix with `_async`. They have one more argument compared with their synchronous counterparts: `__tgt_async_info*`, which is a new struct that only has one field, `void *Identifier`. This struct is for information exchange between different asynchronous operations. It can be used for stream selection, like in this case, or operation synchronization, which is also used. We may expect more usages in the future.
2. Optimization of stream selection for data mapping. Previous implementation was using asynchronous device memory transfer but synchronizing after each memory transfer. Actually, if we say kernel A needs four memory copy to device and two memory copy back to host, then we can schedule these seven operations (four H2D, two D2H, and one kernel launch) into a same stream and just need synchronization after memory copy from device to host. In this way, we can save a huge overhead compared with synchronization after each operation.
Reviewers: jdoerfert, ye-luo
Reviewed By: jdoerfert
Subscribers: yaxunl, lildmh, guansong, openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D77005
2020-04-08 02:51:56 +08:00
|
|
|
|
|
|
|
// Device synchronization. In case of success, return zero. Otherwise, return an
|
|
|
|
// error code.
|
2021-02-11 04:04:37 +08:00
|
|
|
int32_t __tgt_rtl_synchronize(int32_t ID, __tgt_async_info *AsyncInfo);
|
2017-05-10 22:12:36 +08:00
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif // _OMPTARGETPLUGIN_H_
|