forked from OSchip/llvm-project
[OpenMP][Opt] Annotate known runtime functions and deduplicate more
This adds ~27 more runtime calls to the OpenMPKinds.def file, all with attributes. We deduplicate 16 of those automatically in function = thread scope. And we annotate all of them automatically during the OpenMPOpt discovery step. A test with all omp_XXXX runtime calls to track annotation coverage is included. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D69984
This commit is contained in:
parent
8df173f399
commit
e28936f613
|
@ -176,6 +176,34 @@ __OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32)
|
||||||
__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32)
|
__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32)
|
||||||
|
|
||||||
__OMP_RTL(omp_get_thread_num, false, Int32, )
|
__OMP_RTL(omp_get_thread_num, false, Int32, )
|
||||||
|
__OMP_RTL(omp_get_num_threads, false, Int32, )
|
||||||
|
__OMP_RTL(omp_get_max_threads, false, Int32, )
|
||||||
|
__OMP_RTL(omp_in_parallel, false, Int32, )
|
||||||
|
__OMP_RTL(omp_get_dynamic, false, Int32, )
|
||||||
|
__OMP_RTL(omp_get_cancellation, false, Int32, )
|
||||||
|
__OMP_RTL(omp_get_nested, false, Int32, )
|
||||||
|
__OMP_RTL(omp_get_schedule, false, Void, Int32Ptr, Int32Ptr)
|
||||||
|
__OMP_RTL(omp_get_thread_limit, false, Int32, )
|
||||||
|
__OMP_RTL(omp_get_supported_active_levels, false, Int32, )
|
||||||
|
__OMP_RTL(omp_get_max_active_levels, false, Int32, )
|
||||||
|
__OMP_RTL(omp_get_level, false, Int32, )
|
||||||
|
__OMP_RTL(omp_get_ancestor_thread_num, false, Int32, )
|
||||||
|
__OMP_RTL(omp_get_team_size, false, Int32, )
|
||||||
|
__OMP_RTL(omp_get_active_level, false, Int32, )
|
||||||
|
__OMP_RTL(omp_in_final, false, Int32, )
|
||||||
|
__OMP_RTL(omp_get_proc_bind, false, Int32, )
|
||||||
|
__OMP_RTL(omp_get_num_places, false, Int32, )
|
||||||
|
__OMP_RTL(omp_get_num_procs, false, Int32, )
|
||||||
|
__OMP_RTL(omp_get_place_proc_ids, false, Void, Int32, Int32Ptr)
|
||||||
|
__OMP_RTL(omp_get_place_num, false, Int32, )
|
||||||
|
__OMP_RTL(omp_get_partition_num_places, false, Int32, )
|
||||||
|
__OMP_RTL(omp_get_partition_place_nums, false, Int32, )
|
||||||
|
|
||||||
|
__OMP_RTL(omp_set_num_threads, false, Void, Int32)
|
||||||
|
__OMP_RTL(omp_set_dynamic, false, Void, Int32)
|
||||||
|
__OMP_RTL(omp_set_nested, false, Void, Int32)
|
||||||
|
__OMP_RTL(omp_set_schedule, false, Void, Int32, Int32)
|
||||||
|
__OMP_RTL(omp_set_max_active_levels, false, Void, Int32)
|
||||||
|
|
||||||
__OMP_RTL(__last, false, Void, )
|
__OMP_RTL(__last, false, Void, )
|
||||||
|
|
||||||
|
@ -197,6 +225,16 @@ __OMP_ATTRS_SET(GetterAttrs,
|
||||||
? AttributeSet(EnumAttr(NoUnwind), EnumAttr(ReadOnly),
|
? AttributeSet(EnumAttr(NoUnwind), EnumAttr(ReadOnly),
|
||||||
EnumAttr(NoSync), EnumAttr(NoFree))
|
EnumAttr(NoSync), EnumAttr(NoFree))
|
||||||
: AttributeSet(EnumAttr(NoUnwind)))
|
: AttributeSet(EnumAttr(NoUnwind)))
|
||||||
|
__OMP_ATTRS_SET(GetterArgWriteAttrs,
|
||||||
|
OptimisticAttributes
|
||||||
|
? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync),
|
||||||
|
EnumAttr(NoFree))
|
||||||
|
: AttributeSet(EnumAttr(NoUnwind)))
|
||||||
|
__OMP_ATTRS_SET(SetterAttrs,
|
||||||
|
OptimisticAttributes
|
||||||
|
? AttributeSet(EnumAttr(NoUnwind), EnumAttr(WriteOnly),
|
||||||
|
EnumAttr(NoSync), EnumAttr(NoFree))
|
||||||
|
: AttributeSet(EnumAttr(NoUnwind)))
|
||||||
|
|
||||||
#undef __OMP_ATTRS_SET
|
#undef __OMP_ATTRS_SET
|
||||||
#undef OMP_ATTRS_SET
|
#undef OMP_ATTRS_SET
|
||||||
|
@ -213,6 +251,41 @@ __OMP_RTL_ATTRS(__kmpc_fork_call, AttributeSet(EnumAttr(NoUnwind)),
|
||||||
|
|
||||||
__OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, AttributeSet(), {})
|
__OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, AttributeSet(), {})
|
||||||
__OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, AttributeSet(), {})
|
__OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_get_num_threads, GetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_get_max_threads, GetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_in_parallel, GetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_get_dynamic, GetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_get_cancellation, GetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_get_nested, GetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_get_schedule, GetterArgWriteAttrs, AttributeSet(),
|
||||||
|
ArrayRef<AttributeSet>(
|
||||||
|
{AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly)),
|
||||||
|
AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly))}))
|
||||||
|
__OMP_RTL_ATTRS(omp_get_thread_limit, GetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_get_supported_active_levels, GetterAttrs, AttributeSet(),
|
||||||
|
{})
|
||||||
|
__OMP_RTL_ATTRS(omp_get_max_active_levels, GetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_get_level, GetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_get_ancestor_thread_num, GetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_get_team_size, GetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_get_active_level, GetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_in_final, GetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_get_proc_bind, GetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_get_num_places, GetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_get_num_procs, GetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_get_place_proc_ids, GetterArgWriteAttrs, AttributeSet(),
|
||||||
|
ArrayRef<AttributeSet>({AttributeSet(),
|
||||||
|
AttributeSet(EnumAttr(NoCapture),
|
||||||
|
EnumAttr(WriteOnly))}))
|
||||||
|
__OMP_RTL_ATTRS(omp_get_place_num, GetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_get_partition_num_places, GetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_get_partition_place_nums, GetterAttrs, AttributeSet(), {})
|
||||||
|
|
||||||
|
__OMP_RTL_ATTRS(omp_set_num_threads, SetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_set_dynamic, SetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_set_nested, SetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_set_schedule, SetterAttrs, AttributeSet(), {})
|
||||||
|
__OMP_RTL_ATTRS(omp_set_max_active_levels, SetterAttrs, AttributeSet(), {})
|
||||||
|
|
||||||
#undef __OMP_RTL_ATTRS
|
#undef __OMP_RTL_ATTRS
|
||||||
#undef OMP_RTL_ATTRS
|
#undef OMP_RTL_ATTRS
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
#include "llvm/Analysis/CallGraph.h"
|
#include "llvm/Analysis/CallGraph.h"
|
||||||
#include "llvm/Analysis/CallGraphSCCPass.h"
|
#include "llvm/Analysis/CallGraphSCCPass.h"
|
||||||
#include "llvm/Frontend/OpenMP/OMPConstants.h"
|
#include "llvm/Frontend/OpenMP/OMPConstants.h"
|
||||||
|
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
|
||||||
#include "llvm/IR/CallSite.h"
|
#include "llvm/IR/CallSite.h"
|
||||||
#include "llvm/InitializePasses.h"
|
#include "llvm/InitializePasses.h"
|
||||||
#include "llvm/Support/CommandLine.h"
|
#include "llvm/Support/CommandLine.h"
|
||||||
|
@ -52,9 +53,10 @@ struct OpenMPOpt {
|
||||||
SmallPtrSetImpl<Function *> &ModuleSlice,
|
SmallPtrSetImpl<Function *> &ModuleSlice,
|
||||||
CallGraphUpdater &CGUpdater)
|
CallGraphUpdater &CGUpdater)
|
||||||
: M(*(*SCC.begin())->getParent()), SCC(SCC), ModuleSlice(ModuleSlice),
|
: M(*(*SCC.begin())->getParent()), SCC(SCC), ModuleSlice(ModuleSlice),
|
||||||
CGUpdater(CGUpdater) {
|
OMPBuilder(M), CGUpdater(CGUpdater) {
|
||||||
initializeTypes(M);
|
initializeTypes(M);
|
||||||
initializeRuntimeFunctions();
|
initializeRuntimeFunctions();
|
||||||
|
OMPBuilder.initialize();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Generic information that describes a runtime function
|
/// Generic information that describes a runtime function
|
||||||
|
@ -118,12 +120,36 @@ private:
|
||||||
bool deduplicateRuntimeCalls() {
|
bool deduplicateRuntimeCalls() {
|
||||||
bool Changed = false;
|
bool Changed = false;
|
||||||
|
|
||||||
|
RuntimeFunction DeduplicableRuntimeCallIDs[] = {
|
||||||
|
OMPRTL_omp_get_num_threads,
|
||||||
|
OMPRTL_omp_in_parallel,
|
||||||
|
OMPRTL_omp_get_cancellation,
|
||||||
|
OMPRTL_omp_get_thread_limit,
|
||||||
|
OMPRTL_omp_get_supported_active_levels,
|
||||||
|
OMPRTL_omp_get_level,
|
||||||
|
OMPRTL_omp_get_ancestor_thread_num,
|
||||||
|
OMPRTL_omp_get_team_size,
|
||||||
|
OMPRTL_omp_get_active_level,
|
||||||
|
OMPRTL_omp_in_final,
|
||||||
|
OMPRTL_omp_get_proc_bind,
|
||||||
|
OMPRTL_omp_get_num_places,
|
||||||
|
OMPRTL_omp_get_num_procs,
|
||||||
|
OMPRTL_omp_get_place_num,
|
||||||
|
OMPRTL_omp_get_partition_num_places,
|
||||||
|
OMPRTL_omp_get_partition_place_nums};
|
||||||
|
|
||||||
|
// Global-tid is handled separatly.
|
||||||
SmallSetVector<Value *, 16> GTIdArgs;
|
SmallSetVector<Value *, 16> GTIdArgs;
|
||||||
collectGlobalThreadIdArguments(GTIdArgs);
|
collectGlobalThreadIdArguments(GTIdArgs);
|
||||||
LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
|
LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
|
||||||
<< " global thread ID arguments\n");
|
<< " global thread ID arguments\n");
|
||||||
|
|
||||||
for (Function *F : SCC) {
|
for (Function *F : SCC) {
|
||||||
|
for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
|
||||||
|
deduplicateRuntimeCalls(*F, RFIs[DeduplicableRuntimeCallID]);
|
||||||
|
|
||||||
|
// __kmpc_global_thread_num is special as we can replace it with an
|
||||||
|
// argument in enough cases to make it worth trying.
|
||||||
Value *GTIdArg = nullptr;
|
Value *GTIdArg = nullptr;
|
||||||
for (Argument &Arg : F->args())
|
for (Argument &Arg : F->args())
|
||||||
if (GTIdArgs.count(&Arg)) {
|
if (GTIdArgs.count(&Arg)) {
|
||||||
|
@ -132,7 +158,6 @@ private:
|
||||||
}
|
}
|
||||||
Changed |= deduplicateRuntimeCalls(
|
Changed |= deduplicateRuntimeCalls(
|
||||||
*F, RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
|
*F, RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
|
||||||
Changed |= deduplicateRuntimeCalls(*F, RFIs[OMPRTL_omp_get_thread_num]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return Changed;
|
return Changed;
|
||||||
|
@ -259,6 +284,7 @@ private:
|
||||||
unsigned NumUses = 0;
|
unsigned NumUses = 0;
|
||||||
if (!RFI.Declaration)
|
if (!RFI.Declaration)
|
||||||
return NumUses;
|
return NumUses;
|
||||||
|
OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration);
|
||||||
|
|
||||||
NumOpenMPRuntimeFunctionsIdentified += 1;
|
NumOpenMPRuntimeFunctionsIdentified += 1;
|
||||||
NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
|
NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
|
||||||
|
@ -312,6 +338,9 @@ private:
|
||||||
/// The slice of the module we are allowed to look at.
|
/// The slice of the module we are allowed to look at.
|
||||||
SmallPtrSetImpl<Function *> &ModuleSlice;
|
SmallPtrSetImpl<Function *> &ModuleSlice;
|
||||||
|
|
||||||
|
/// An OpenMP-IR-Builder instance
|
||||||
|
OpenMPIRBuilder OMPBuilder;
|
||||||
|
|
||||||
/// Callback to update the call graph, the first argument is a removed call,
|
/// Callback to update the call graph, the first argument is a removed call,
|
||||||
/// the second an optional replacement call.
|
/// the second an optional replacement call.
|
||||||
CallGraphUpdater &CGUpdater;
|
CallGraphUpdater &CGUpdater;
|
||||||
|
|
Loading…
Reference in New Issue