[OpenMP][Tool] Add Archer option to disable data race analysis for sequential part

This introduces the new `ARCHER_OPTIONS` flag `ignore_serial=0|1` to disable
analysis and logging of memory accesses in the sequential part of the OpenMP
application.

In the sequential part of an OpenMP program no data race is possible, unless
there is non-OpenMP concurrency (such as pthreads, MPI, ...). For the latter
reason, this is not active by default.

Besides reducing the runtime overhead for the sequential part of the program,
this reduces the memory overhead for sequential initialization. In combination
with `flush_shadow=1` this can allow analysis of applications, which run close
to the limit of available memory, but only access smaller parts of shared
memory during each OpenMP parallel region.

A problem for this approach is that Archer only gets active, when the OpenMP
runtime gets initialized, which might be after serial initialization of the
application. In such case, it helps to call for example `omp_get_max_threads()`
at the beginning of main.

Differential Revision: https://reviews.llvm.org/D90473
This commit is contained in:
Joachim Protze 2020-11-02 16:34:46 +01:00
parent fe9dc2e54a
commit fdc9dfc8e4
11 changed files with 63 additions and 30 deletions

View File

@ -104,7 +104,11 @@ different flags are separated by spaces, e.g.:
<tr>
<td class="org-left">flush&#95;shadow</td>
<td class="org-right">0</td>
<td class="org-left">Flush shadow memory at the end of an outer OpenMP parallel region. Our experiments show that this can reduce memory overhead by ~30% and runtime overhead by ~10%. This flag is useful for large OpenMP applications that typically require large amounts of memory, causing out-of-memory exceptions when checked by Archer.</td>
<td class="org-left">Flush shadow memory at the end of an outer OpenMP
parallel region. Our experiments show that this can reduce memory overhead
by ~30% and runtime overhead by ~10%. This flag is useful for large OpenMP
applications that typically require large amounts of memory, causing
out-of-memory exceptions when checked by Archer.</td>
</tr>
</tbody>
@ -116,6 +120,17 @@ different flags are separated by spaces, e.g.:
</tr>
</tbody>
<tbody>
<tr>
<td class="org-left">ignore&#95;serial</td>
<td class="org-right">0</td>
<td class="org-left">Turn off tracking and analysis of memory accesses in
the sequential part of an OpenMP program. (Only effective when OpenMP
runtime is initialized. In doubt, insert omp_get_max_threads() as first
statement in main!)</td>
</tr>
</tbody>
<tbody>
<tr>
<td class="org-left">verbose</td>

View File

@ -56,18 +56,14 @@ static int hasReductionCallback;
class ArcherFlags {
public:
#if (LLVM_VERSION) >= 40
int flush_shadow;
int flush_shadow{0};
#endif
int print_max_rss;
int verbose;
int enabled;
int print_max_rss{0};
int verbose{0};
int enabled{1};
int ignore_serial{0};
ArcherFlags(const char *env)
:
#if (LLVM_VERSION) >= 40
flush_shadow(0),
#endif
print_max_rss(0), verbose(0), enabled(1) {
ArcherFlags(const char *env) {
if (env) {
std::vector<std::string> tokens;
std::string token;
@ -88,6 +84,8 @@ public:
continue;
if (sscanf(it->c_str(), "enable=%d", &enabled))
continue;
if (sscanf(it->c_str(), "ignore_serial=%d", &ignore_serial))
continue;
std::cerr << "Illegal values for ARCHER_OPTIONS variable: " << token
<< std::endl;
}
@ -410,7 +408,7 @@ struct TaskData {
bool InBarrier;
/// Whether this task is an included task.
bool Included;
int TaskType{0};
/// Index of which barrier to use next.
char BarrierIndex;
@ -443,8 +441,8 @@ struct TaskData {
int execution;
int freed;
TaskData(TaskData *Parent)
: InBarrier(false), Included(false), BarrierIndex(0), RefCount(1),
TaskData(TaskData *Parent, int taskType)
: InBarrier(false), TaskType(taskType), BarrierIndex(0), RefCount(1),
Parent(Parent), ImplicitTask(nullptr), Team(Parent->Team),
TaskGroup(nullptr), DependencyCount(0), execution(0), freed(0) {
if (Parent != nullptr) {
@ -455,8 +453,8 @@ struct TaskData {
}
}
TaskData(ParallelData *Team = nullptr)
: InBarrier(false), Included(false), BarrierIndex(0), RefCount(1),
TaskData(ParallelData *Team, int taskType)
: InBarrier(false), TaskType(taskType), BarrierIndex(0), RefCount(1),
Parent(nullptr), ImplicitTask(this), Team(Team), TaskGroup(nullptr),
DependencyCount(0), execution(1), freed(0) {}
@ -465,6 +463,17 @@ struct TaskData {
TsanDeleteClock(&Taskwait);
}
bool isIncluded() { return TaskType & ompt_task_undeferred; }
bool isUntied() { return TaskType & ompt_task_untied; }
bool isFinal() { return TaskType & ompt_task_final; }
bool isMergable() { return TaskType & ompt_task_mergeable; }
bool isMerged() { return TaskType & ompt_task_merged; }
bool isExplicit() { return TaskType & ompt_task_explicit; }
bool isImplicit() { return TaskType & ompt_task_implicit; }
bool isInitial() { return TaskType & ompt_task_initial; }
bool isTarget() { return TaskType & ompt_task_target; }
void *GetTaskPtr() { return &Task; }
void *GetTaskwaitPtr() { return &Taskwait; }
@ -517,11 +526,15 @@ static void ompt_tsan_parallel_begin(ompt_data_t *parent_task_data,
parallel_data->ptr = Data;
TsanHappensBefore(Data->GetParallelPtr());
if (archer_flags->ignore_serial && ToTaskData(parent_task_data)->isInitial())
TsanIgnoreWritesEnd();
}
static void ompt_tsan_parallel_end(ompt_data_t *parallel_data,
ompt_data_t *task_data, int flag,
const void *codeptr_ra) {
if (archer_flags->ignore_serial && ToTaskData(task_data)->isInitial())
TsanIgnoreWritesBegin();
ParallelData *Data = ToParallelData(parallel_data);
TsanHappensAfter(Data->GetBarrierPtr(0));
TsanHappensAfter(Data->GetBarrierPtr(1));
@ -546,7 +559,7 @@ static void ompt_tsan_implicit_task(ompt_scope_endpoint_t endpoint,
if (type & ompt_task_initial) {
parallel_data->ptr = new ParallelData(nullptr);
}
task_data->ptr = new TaskData(ToParallelData(parallel_data));
task_data->ptr = new TaskData(ToParallelData(parallel_data), type);
TsanHappensAfter(ToParallelData(parallel_data)->GetParallelPtr());
TsanFuncEntry(ToParallelData(parallel_data)->codePtr);
break;
@ -727,14 +740,13 @@ static void ompt_tsan_task_create(
ParallelData *PData = new ParallelData(nullptr);
parallel_data->ptr = PData;
Data = new TaskData(PData);
Data = new TaskData(PData, type);
new_task_data->ptr = Data;
} else if (type & ompt_task_undeferred) {
Data = new TaskData(ToTaskData(parent_task_data));
Data = new TaskData(ToTaskData(parent_task_data), type);
new_task_data->ptr = Data;
Data->Included = true;
} else if (type & ompt_task_explicit || type & ompt_task_target) {
Data = new TaskData(ToTaskData(parent_task_data));
Data = new TaskData(ToTaskData(parent_task_data), type);
new_task_data->ptr = Data;
// Use the newly created address. We cannot use a single address from the
@ -801,7 +813,7 @@ static void ompt_tsan_task_schedule(ompt_data_t *first_task_data,
prior_task_status == ompt_task_late_fulfill) {
// Included tasks are executed sequentially, no need to track
// synchronization
if (!FromTask->Included) {
if (!FromTask->isIncluded()) {
// Task will finish before a barrier in the surrounding parallel region
// ...
ParallelData *PData = FromTask->Team;
@ -976,10 +988,14 @@ static int ompt_tsan_initialize(ompt_function_lookup_t lookup, int device_num,
"Warning: please export "
"TSAN_OPTIONS='ignore_noninstrumented_modules=1' "
"to avoid false positive reports from the OpenMP runtime!\n");
if (archer_flags->ignore_serial)
TsanIgnoreWritesBegin();
return 1; // success
}
static void ompt_tsan_finalize(ompt_data_t *tool_data) {
if (archer_flags->ignore_serial)
TsanIgnoreWritesEnd();
if (archer_flags->print_max_rss) {
struct rusage end;
getrusage(RUSAGE_SELF, &end);

View File

@ -91,6 +91,8 @@ if 'INTEL_LICENSE_FILE' in os.environ:
config.environment['INTEL_LICENSE_FILE'] = os.environ['INTEL_LICENSE_FILE']
# Race Tests
config.substitutions.append(("%libarcher-compile-and-run-race-noserial", \
"%libarcher-compile && env ARCHER_OPTIONS=ignore_serial=1 %libarcher-run-race"))
config.substitutions.append(("%libarcher-compile-and-run-race", \
"%libarcher-compile && %libarcher-run-race"))
config.substitutions.append(("%libarcher-compile-and-run-nosuppression", \

View File

@ -1,7 +1,6 @@
/*
* critical-unrelated.c -- Archer testcase
*/
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@ -12,6 +11,7 @@
//===----------------------------------------------------------------------===//
// RUN: %libarcher-compile-and-run-race | FileCheck %s
// RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s
// REQUIRES: tsan
#include <omp.h>
#include <stdio.h>

View File

@ -1,7 +1,6 @@
/*
* lock-nested-unrelated.c -- Archer testcase
*/
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@ -12,6 +11,7 @@
//===----------------------------------------------------------------------===//
// RUN: %libarcher-compile-and-run-race | FileCheck %s
// RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s
// REQUIRES: tsan
#include <omp.h>
#include <stdio.h>

View File

@ -1,7 +1,6 @@
/*
* lock-unrelated.c -- Archer testcase
*/
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@ -12,6 +11,7 @@
//===----------------------------------------------------------------------===//
// RUN: %libarcher-compile-and-run-race | FileCheck %s
// RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s
// REQUIRES: tsan
#include <omp.h>
#include <stdio.h>

View File

@ -1,7 +1,6 @@
/*
* parallel-simple.c -- Archer testcase
*/
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@ -12,6 +11,7 @@
//===----------------------------------------------------------------------===//
// RUN: %libarcher-compile-and-run-race | FileCheck %s
// RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s
// REQUIRES: tsan
#include <omp.h>
#include <stdio.h>

View File

@ -1,7 +1,6 @@
/*
* task-dependency.c -- Archer testcase
*/
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@ -12,6 +11,7 @@
//===----------------------------------------------------------------------===//
// RUN: %libarcher-compile-and-run-race | FileCheck %s
// RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s
// REQUIRES: tsan
#include "ompt/ompt-signal.h"
#include <omp.h>

View File

@ -1,7 +1,6 @@
/*
* task-taskgroup-unrelated.c -- Archer testcase
*/
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@ -12,6 +11,7 @@
//===----------------------------------------------------------------------===//
// RUN: %libarcher-compile-and-run-race | FileCheck %s
// RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s
// REQUIRES: tsan
#include "ompt/ompt-signal.h"
#include <omp.h>

View File

@ -1,7 +1,6 @@
/*
* task-taskwait-nested.c -- Archer testcase
*/
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@ -12,6 +11,7 @@
//===----------------------------------------------------------------------===//
// RUN: %libarcher-compile-and-run-race | FileCheck %s
// RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s
// REQUIRES: tsan
#include "ompt/ompt-signal.h"
#include <omp.h>

View File

@ -1,7 +1,6 @@
/*
* task-two.c -- Archer testcase
*/
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@ -12,6 +11,7 @@
//===----------------------------------------------------------------------===//
// RUN: %libarcher-compile-and-run-race | FileCheck %s
// RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s
// REQUIRES: tsan
#include <omp.h>
#include <stdio.h>