forked from OSchip/llvm-project
[OpenMP] Begin Printing Information Dumps In Libomptarget and Plugins
Summary: This patch starts adding support for adding information dumps to libomptarget and rtl plugins. The information printing is controlled by the LIBOMPTARGET_INFO environment variable introduced in D86483. The goal of this patch is to provide the user with additional information about the device during kernel execution and providing the user with information dumps in the case of failure. This patch added the ability to dump the pointer mapping table as well as printing the number of blocks and threads in the cuda RTL. Reviewers: jdoerfort gkistanova ye-luo Subscribers: guansong openmp-commits sstefan1 yaxunl ye-luo Tags: #OpenMP Differential Revision: https://reviews.llvm.org/D87165
This commit is contained in:
parent
6e45b98934
commit
ae209397b1
|
@ -70,23 +70,26 @@ static inline int getDebugLevel() {
|
|||
#define GETNAME2(name) #name
|
||||
#define GETNAME(name) GETNAME2(name)
|
||||
|
||||
// Messaging interface
|
||||
/// Print a generic message string from libomptarget or a plugin RTL
|
||||
#define MESSAGE0(_str) \
|
||||
do { \
|
||||
fprintf(stderr, GETNAME(TARGET_NAME) " message: %s\n", _str); \
|
||||
} while (0)
|
||||
|
||||
/// Print a printf formatting string message from libomptarget or a plugin RTL
|
||||
#define MESSAGE(_str, ...) \
|
||||
do { \
|
||||
fprintf(stderr, GETNAME(TARGET_NAME) " message: " _str "\n", __VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
/// Print fatal error message with an error string and error identifier
|
||||
#define FATAL_MESSAGE0(_num, _str) \
|
||||
do { \
|
||||
fprintf(stderr, GETNAME(TARGET_NAME) " fatal error %d: %s\n", _num, _str); \
|
||||
abort(); \
|
||||
} while (0)
|
||||
|
||||
/// Print fatal error message with a printf string and error identifier
|
||||
#define FATAL_MESSAGE(_num, _str, ...) \
|
||||
do { \
|
||||
fprintf(stderr, GETNAME(TARGET_NAME) " fatal error %d:" _str "\n", _num, \
|
||||
|
@ -94,12 +97,20 @@ static inline int getDebugLevel() {
|
|||
abort(); \
|
||||
} while (0)
|
||||
|
||||
/// Print a generic error string from libomptarget or a plugin RTL
|
||||
#define FAILURE_MESSAGE(...) \
|
||||
do { \
|
||||
fprintf(stderr, GETNAME(TARGET_NAME) " error: "); \
|
||||
fprintf(stderr, __VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
/// Print a generic information string used if LIBOMPTARGET_INFO=1
|
||||
#define INFO_MESSAGE(_num, ...) \
|
||||
do { \
|
||||
fprintf(stderr, GETNAME(TARGET_NAME) " device %d info: ", _num); \
|
||||
fprintf(stderr, __VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
// Debugging messages
|
||||
#ifdef OMPTARGET_DEBUG
|
||||
#include <stdio.h>
|
||||
|
@ -110,6 +121,7 @@ static inline int getDebugLevel() {
|
|||
fprintf(stderr, __VA_ARGS__); \
|
||||
}
|
||||
|
||||
/// Emit a message for debugging
|
||||
#define DP(...) \
|
||||
do { \
|
||||
if (getDebugLevel() > 0) { \
|
||||
|
@ -117,6 +129,7 @@ static inline int getDebugLevel() {
|
|||
} \
|
||||
} while (false)
|
||||
|
||||
/// Emit a message for debugging or failure if debugging is disabled
|
||||
#define REPORT(...) \
|
||||
do { \
|
||||
if (getDebugLevel() > 0) { \
|
||||
|
@ -133,4 +146,14 @@ static inline int getDebugLevel() {
|
|||
#define REPORT(...) FAILURE_MESSAGE(__VA_ARGS__);
|
||||
#endif // OMPTARGET_DEBUG
|
||||
|
||||
/// Emit a message giving the user extra information about the runtime if
|
||||
#define INFO(_id, ...) \
|
||||
do { \
|
||||
if (getDebugLevel() > 0) { \
|
||||
DEBUGP(DEBUG_PREFIX, __VA_ARGS__); \
|
||||
} else if (getInfoLevel() > 0) { \
|
||||
INFO_MESSAGE(_id, __VA_ARGS__); \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#endif // _OMPTARGET_DEBUG_H
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
#ifdef OMPTARGET_DEBUG
|
||||
#define CUDA_ERR_STRING(err) \
|
||||
do { \
|
||||
if (getDebugLevel() > 0) { \
|
||||
if (getDebugLevel() > 0) { \
|
||||
const char *errStr; \
|
||||
cuGetErrorString(err, &errStr); \
|
||||
DP("CUDA error is: %s\n", errStr); \
|
||||
|
@ -277,14 +277,15 @@ class DeviceRTLTy {
|
|||
E.Entries.push_back(entry);
|
||||
}
|
||||
|
||||
// Return true if the entry is associated with device
|
||||
bool findOffloadEntry(const int DeviceId, const void *Addr) const {
|
||||
// Return a pointer to the entry associated with the pointer
|
||||
const __tgt_offload_entry *getOffloadEntry(const int DeviceId,
|
||||
const void *Addr) const {
|
||||
for (const __tgt_offload_entry &Itr :
|
||||
DeviceData[DeviceId].FuncGblEntries.back().Entries)
|
||||
if (Itr.addr == Addr)
|
||||
return true;
|
||||
return &Itr;
|
||||
|
||||
return false;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Return the pointer to the target entries table
|
||||
|
@ -492,9 +493,11 @@ public:
|
|||
DeviceData[DeviceId].BlocksPerGrid = EnvTeamLimit;
|
||||
}
|
||||
|
||||
DP("Max number of CUDA blocks %d, threads %d & warp size %d\n",
|
||||
DeviceData[DeviceId].BlocksPerGrid, DeviceData[DeviceId].ThreadsPerBlock,
|
||||
DeviceData[DeviceId].WarpSize);
|
||||
INFO(DeviceId,
|
||||
"Device supports up to %d CUDA blocks and %d threads with a "
|
||||
"warp size of %d\n",
|
||||
DeviceData[DeviceId].BlocksPerGrid,
|
||||
DeviceData[DeviceId].ThreadsPerBlock, DeviceData[DeviceId].WarpSize);
|
||||
|
||||
// Set default number of teams
|
||||
if (EnvNumTeams > 0) {
|
||||
|
@ -926,9 +929,14 @@ public:
|
|||
CudaBlocksPerGrid = TeamNum;
|
||||
}
|
||||
|
||||
// Run on the device.
|
||||
DP("Launch kernel with %d blocks and %d threads\n", CudaBlocksPerGrid,
|
||||
CudaThreadsPerBlock);
|
||||
INFO(DeviceId,
|
||||
"Launching kernel %s with %d blocks and %d threads in %s "
|
||||
"mode\n",
|
||||
(getOffloadEntry(DeviceId, TgtEntryPtr))
|
||||
? getOffloadEntry(DeviceId, TgtEntryPtr)->name
|
||||
: "(null)",
|
||||
CudaBlocksPerGrid, CudaThreadsPerBlock,
|
||||
(KernelInfo->ExecutionMode == SPMD) ? "SPMD" : "Generic");
|
||||
|
||||
CUstream Stream = getStream(DeviceId, AsyncInfo);
|
||||
Err = cuLaunchKernel(KernelInfo->Func, CudaBlocksPerGrid, /* gridDimY */ 1,
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "rtl.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <mutex>
|
||||
|
||||
|
@ -24,8 +25,22 @@ kmp_target_offload_kind_t TargetOffloadPolicy = tgt_default;
|
|||
std::mutex TargetOffloadMtx;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// manage the success or failure of a target construct
|
||||
/// dump a table of all the host-target pointer pairs on failure
|
||||
static void dumpTargetPointerMappings() {
|
||||
for (const auto &Device : Devices) {
|
||||
fprintf(stderr, "Device %d:\n", Device.DeviceID);
|
||||
fprintf(stderr, "%-18s %-18s %s\n", "Host Ptr", "Target Ptr", "Size (B)");
|
||||
for (const auto &HostTargetMap : Device.HostDataToTargetMap) {
|
||||
fprintf(stderr, DPxMOD " " DPxMOD " %lu\n",
|
||||
DPxPTR(HostTargetMap.HstPtrBegin),
|
||||
DPxPTR(HostTargetMap.TgtPtrBegin),
|
||||
HostTargetMap.HstPtrEnd - HostTargetMap.HstPtrBegin);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// manage the success or failure of a target construct
|
||||
static void HandleDefaultTargetOffload() {
|
||||
TargetOffloadMtx.lock();
|
||||
if (TargetOffloadPolicy == tgt_default) {
|
||||
|
@ -60,8 +75,11 @@ static void HandleTargetOutcome(bool success) {
|
|||
break;
|
||||
case tgt_mandatory:
|
||||
if (!success) {
|
||||
if (getInfoLevel() > 0)
|
||||
MESSAGE0("LIBOMPTARGET_INFO is not supported yet");
|
||||
if (getInfoLevel() > 1)
|
||||
dumpTargetPointerMappings();
|
||||
else
|
||||
FAILURE_MESSAGE("run with env LIBOMPTARGET_INFO>1 to dump tables\n");
|
||||
|
||||
FATAL_MESSAGE0(1, "failure of target construct while offloading is mandatory");
|
||||
}
|
||||
break;
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
// RUN: %libomptarget-compile-nvptx64-nvidia-cuda && env LIBOMPTARGET_INFO=1 %libomptarget-run-nvptx64-nvidia-cuda 2>&1 | %fcheck-nvptx64-nvidia-cuda -allow-empty -check-prefix=INFO
|
||||
|
||||
#include <stdio.h>
|
||||
#include <omp.h>
|
||||
|
||||
int main() {
|
||||
int ptr = 1;
|
||||
|
||||
// INFO: CUDA device {{[0-9]+}} info: Device supports up to {{[0-9]+}} CUDA blocks and {{[0-9]+}} threads with a warp size of {{[0-9]+}}
|
||||
// INFO: CUDA device {{[0-9]+}} info: Launching kernel {{.*}} with {{[0-9]+}} blocks and {{[0-9]+}} threads in Generic mode
|
||||
#pragma omp target map(tofrom:ptr)
|
||||
{ptr = 1;}
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue