[OpenMP] Begin Printing Information Dumps In Libomptarget and Plugins

Summary:
This patch starts adding support for adding information dumps to libomptarget
and rtl plugins. The information printing is controlled by the
LIBOMPTARGET_INFO environment variable introduced in D86483. The goal of this
patch is to provide the user with additional information about the device
during kernel execution and providing the user with information dumps in the
case of failure. This patch added the ability to dump the pointer mapping table
as well as printing the number of blocks and threads in the cuda RTL.

Reviewers: jdoerfort gkistanova	ye-luo

Subscribers: guansong openmp-commits sstefan1 yaxunl ye-luo

Tags: #OpenMP

Differential Revision: https://reviews.llvm.org/D87165
This commit is contained in:
Joseph Huber 2020-09-04 15:03:49 -04:00 committed by Huber, Joseph
parent 6e45b98934
commit ae209397b1
4 changed files with 79 additions and 15 deletions

View File

@ -70,23 +70,26 @@ static inline int getDebugLevel() {
#define GETNAME2(name) #name
#define GETNAME(name) GETNAME2(name)
// Messaging interface
/// Print a generic message string from libomptarget or a plugin RTL
#define MESSAGE0(_str) \
do { \
fprintf(stderr, GETNAME(TARGET_NAME) " message: %s\n", _str); \
} while (0)
/// Print a printf formatting string message from libomptarget or a plugin RTL
#define MESSAGE(_str, ...) \
do { \
fprintf(stderr, GETNAME(TARGET_NAME) " message: " _str "\n", __VA_ARGS__); \
} while (0)
/// Print fatal error message with an error string and error identifier
#define FATAL_MESSAGE0(_num, _str) \
do { \
fprintf(stderr, GETNAME(TARGET_NAME) " fatal error %d: %s\n", _num, _str); \
abort(); \
} while (0)
/// Print fatal error message with a printf string and error identifier
#define FATAL_MESSAGE(_num, _str, ...) \
do { \
fprintf(stderr, GETNAME(TARGET_NAME) " fatal error %d:" _str "\n", _num, \
@ -94,12 +97,20 @@ static inline int getDebugLevel() {
abort(); \
} while (0)
/// Print a generic error string from libomptarget or a plugin RTL
#define FAILURE_MESSAGE(...) \
do { \
fprintf(stderr, GETNAME(TARGET_NAME) " error: "); \
fprintf(stderr, __VA_ARGS__); \
} while (0)
/// Print a generic information string used if LIBOMPTARGET_INFO=1
#define INFO_MESSAGE(_num, ...) \
do { \
fprintf(stderr, GETNAME(TARGET_NAME) " device %d info: ", _num); \
fprintf(stderr, __VA_ARGS__); \
} while (0)
// Debugging messages
#ifdef OMPTARGET_DEBUG
#include <stdio.h>
@ -110,6 +121,7 @@ static inline int getDebugLevel() {
fprintf(stderr, __VA_ARGS__); \
}
/// Emit a message for debugging
#define DP(...) \
do { \
if (getDebugLevel() > 0) { \
@ -117,6 +129,7 @@ static inline int getDebugLevel() {
} \
} while (false)
/// Emit a message for debugging or failure if debugging is disabled
#define REPORT(...) \
do { \
if (getDebugLevel() > 0) { \
@ -133,4 +146,14 @@ static inline int getDebugLevel() {
#define REPORT(...) FAILURE_MESSAGE(__VA_ARGS__);
#endif // OMPTARGET_DEBUG
/// Emit a message giving the user extra information about the runtime if
#define INFO(_id, ...) \
do { \
if (getDebugLevel() > 0) { \
DEBUGP(DEBUG_PREFIX, __VA_ARGS__); \
} else if (getInfoLevel() > 0) { \
INFO_MESSAGE(_id, __VA_ARGS__); \
} \
} while (false)
#endif // _OMPTARGET_DEBUG_H

View File

@ -29,7 +29,7 @@
#ifdef OMPTARGET_DEBUG
#define CUDA_ERR_STRING(err) \
do { \
if (getDebugLevel() > 0) { \
if (getDebugLevel() > 0) { \
const char *errStr; \
cuGetErrorString(err, &errStr); \
DP("CUDA error is: %s\n", errStr); \
@ -277,14 +277,15 @@ class DeviceRTLTy {
E.Entries.push_back(entry);
}
// Return true if the entry is associated with device
bool findOffloadEntry(const int DeviceId, const void *Addr) const {
// Return a pointer to the entry associated with the pointer
const __tgt_offload_entry *getOffloadEntry(const int DeviceId,
const void *Addr) const {
for (const __tgt_offload_entry &Itr :
DeviceData[DeviceId].FuncGblEntries.back().Entries)
if (Itr.addr == Addr)
return true;
return &Itr;
return false;
return nullptr;
}
// Return the pointer to the target entries table
@ -492,9 +493,11 @@ public:
DeviceData[DeviceId].BlocksPerGrid = EnvTeamLimit;
}
DP("Max number of CUDA blocks %d, threads %d & warp size %d\n",
DeviceData[DeviceId].BlocksPerGrid, DeviceData[DeviceId].ThreadsPerBlock,
DeviceData[DeviceId].WarpSize);
INFO(DeviceId,
"Device supports up to %d CUDA blocks and %d threads with a "
"warp size of %d\n",
DeviceData[DeviceId].BlocksPerGrid,
DeviceData[DeviceId].ThreadsPerBlock, DeviceData[DeviceId].WarpSize);
// Set default number of teams
if (EnvNumTeams > 0) {
@ -926,9 +929,14 @@ public:
CudaBlocksPerGrid = TeamNum;
}
// Run on the device.
DP("Launch kernel with %d blocks and %d threads\n", CudaBlocksPerGrid,
CudaThreadsPerBlock);
INFO(DeviceId,
"Launching kernel %s with %d blocks and %d threads in %s "
"mode\n",
(getOffloadEntry(DeviceId, TgtEntryPtr))
? getOffloadEntry(DeviceId, TgtEntryPtr)->name
: "(null)",
CudaBlocksPerGrid, CudaThreadsPerBlock,
(KernelInfo->ExecutionMode == SPMD) ? "SPMD" : "Generic");
CUstream Stream = getStream(DeviceId, AsyncInfo);
Err = cuLaunchKernel(KernelInfo->Func, CudaBlocksPerGrid, /* gridDimY */ 1,

View File

@ -16,6 +16,7 @@
#include "rtl.h"
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <mutex>
@ -24,8 +25,22 @@ kmp_target_offload_kind_t TargetOffloadPolicy = tgt_default;
std::mutex TargetOffloadMtx;
////////////////////////////////////////////////////////////////////////////////
/// manage the success or failure of a target construct
/// dump a table of all the host-target pointer pairs on failure
static void dumpTargetPointerMappings() {
for (const auto &Device : Devices) {
fprintf(stderr, "Device %d:\n", Device.DeviceID);
fprintf(stderr, "%-18s %-18s %s\n", "Host Ptr", "Target Ptr", "Size (B)");
for (const auto &HostTargetMap : Device.HostDataToTargetMap) {
fprintf(stderr, DPxMOD " " DPxMOD " %lu\n",
DPxPTR(HostTargetMap.HstPtrBegin),
DPxPTR(HostTargetMap.TgtPtrBegin),
HostTargetMap.HstPtrEnd - HostTargetMap.HstPtrBegin);
}
}
}
////////////////////////////////////////////////////////////////////////////////
/// manage the success or failure of a target construct
static void HandleDefaultTargetOffload() {
TargetOffloadMtx.lock();
if (TargetOffloadPolicy == tgt_default) {
@ -60,8 +75,11 @@ static void HandleTargetOutcome(bool success) {
break;
case tgt_mandatory:
if (!success) {
if (getInfoLevel() > 0)
MESSAGE0("LIBOMPTARGET_INFO is not supported yet");
if (getInfoLevel() > 1)
dumpTargetPointerMappings();
else
FAILURE_MESSAGE("run with env LIBOMPTARGET_INFO>1 to dump tables\n");
FATAL_MESSAGE0(1, "failure of target construct while offloading is mandatory");
}
break;

View File

@ -0,0 +1,15 @@
// RUN: %libomptarget-compile-nvptx64-nvidia-cuda && env LIBOMPTARGET_INFO=1 %libomptarget-run-nvptx64-nvidia-cuda 2>&1 | %fcheck-nvptx64-nvidia-cuda -allow-empty -check-prefix=INFO
#include <stdio.h>
#include <omp.h>
int main() {
int ptr = 1;
// INFO: CUDA device {{[0-9]+}} info: Device supports up to {{[0-9]+}} CUDA blocks and {{[0-9]+}} threads with a warp size of {{[0-9]+}}
// INFO: CUDA device {{[0-9]+}} info: Launching kernel {{.*}} with {{[0-9]+}} blocks and {{[0-9]+}} threads in Generic mode
#pragma omp target map(tofrom:ptr)
{ptr = 1;}
return 0;
}