llvm-project/llvm/lib/Support/TimeProfiler.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

331 lines
12 KiB
C++
Raw Normal View History

//===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements hierarchical time profiler.
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/TimeProfiler.h"
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/JSON.h"
[llvm] Cleanup header dependencies in ADT and Support The cleanup was manual, but assisted by "include-what-you-use". It consists in 1. Removing unused forward declaration. No impact expected. 2. Removing unused headers in .cpp files. No impact expected. 3. Removing unused headers in .h files. This removes implicit dependencies and is generally considered a good thing, but this may break downstream builds. I've updated llvm, clang, lld, lldb and mlir deps, and included a list of the modification in the second part of the commit. 4. Replacing header inclusion by forward declaration. This has the same impact as 3. Notable changes: - llvm/Support/TargetParser.h no longer includes llvm/Support/AArch64TargetParser.h nor llvm/Support/ARMTargetParser.h - llvm/Support/TypeSize.h no longer includes llvm/Support/WithColor.h - llvm/Support/YAMLTraits.h no longer includes llvm/Support/Regex.h - llvm/ADT/SmallVector.h no longer includes llvm/Support/MemAlloc.h nor llvm/Support/ErrorHandling.h You may need to add some of these headers in your compilation units, if needs be. As an hint to the impact of the cleanup, running clang++ -E -Iinclude -I../llvm/include ../llvm/lib/Support/*.cpp -std=c++14 -fno-rtti -fno-exceptions | wc -l before: 8000919 lines after: 7917500 lines Reduced dependencies also helps incremental rebuilds and is more ccache friendly, something not shown by the above metric :-) Discourse thread on the topic: https://llvm.discourse.group/t/include-what-you-use-include-cleanup/5831
2022-01-20 19:55:14 +08:00
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Threading.h"
#include <algorithm>
#include <cassert>
#include <chrono>
#include <mutex>
#include <string>
#include <vector>
using namespace std::chrono;
using namespace llvm;
static std::mutex Mu;
// List of all instances
static ManagedStatic<std::vector<TimeTraceProfiler *>>
ThreadTimeTraceProfilerInstances; // GUARDED_BY(Mu)
// Per Thread instance
static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() {
return TimeTraceProfilerInstance;
}
typedef duration<steady_clock::rep, steady_clock::period> DurationType;
typedef time_point<steady_clock> TimePointType;
typedef std::pair<size_t, DurationType> CountAndDurationType;
typedef std::pair<std::string, CountAndDurationType>
NameAndCountAndDurationType;
namespace {
struct Entry {
const TimePointType Start;
TimePointType End;
const std::string Name;
const std::string Detail;
Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt)
: Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
Detail(std::move(Dt)) {}
// Calculate timings for FlameGraph. Cast time points to microsecond precision
// rather than casting duration. This avoid truncation issues causing inner
// scopes overruning outer scopes.
steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const {
return (time_point_cast<microseconds>(Start) -
time_point_cast<microseconds>(StartTime))
.count();
}
steady_clock::rep getFlameGraphDurUs() const {
return (time_point_cast<microseconds>(End) -
time_point_cast<microseconds>(Start))
.count();
}
};
} // namespace
struct llvm::TimeTraceProfiler {
TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "")
: BeginningOfTime(system_clock::now()), StartTime(steady_clock::now()),
ProcName(ProcName), Pid(sys::Process::getProcessId()),
Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {
llvm::get_thread_name(ThreadName);
}
void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name),
Detail());
}
void end() {
assert(!Stack.empty() && "Must call begin() first");
Entry &E = Stack.back();
E.End = steady_clock::now();
// Check that end times monotonically increase.
assert((Entries.empty() ||
(E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >=
Entries.back().getFlameGraphStartUs(StartTime) +
Entries.back().getFlameGraphDurUs())) &&
"TimeProfiler scope ended earlier than previous scope");
// Calculate duration at full precision for overall counts.
DurationType Duration = E.End - E.Start;
// Only include sections longer or equal to TimeTraceGranularity msec.
if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity)
Entries.emplace_back(E);
// Track total time taken by each "name", but only the topmost levels of
// them; e.g. if there's a template instantiation that instantiates other
// templates from within, we only want to add the topmost one. "topmost"
// happens to be the ones that don't have any currently open entries above
// itself.
if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)),
[&](const Entry &Val) { return Val.Name == E.Name; })) {
auto &CountAndTotal = CountAndTotalPerName[E.Name];
CountAndTotal.first++;
CountAndTotal.second += Duration;
}
Stack.pop_back();
}
// Write events from this TimeTraceProfilerInstance and
// ThreadTimeTraceProfilerInstances.
void write(raw_pwrite_stream &OS) {
// Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
std::lock_guard<std::mutex> Lock(Mu);
assert(Stack.empty() &&
"All profiler sections should be ended when calling write");
assert(llvm::all_of(*ThreadTimeTraceProfilerInstances,
[](const auto &TTP) { return TTP->Stack.empty(); }) &&
"All profiler sections should be ended when calling write");
json::OStream J(OS);
J.objectBegin();
J.attributeBegin("traceEvents");
J.arrayBegin();
// Emit all events for the main flame graph.
auto writeEvent = [&](const auto &E, uint64_t Tid) {
auto StartUs = E.getFlameGraphStartUs(StartTime);
auto DurUs = E.getFlameGraphDurUs();
J.object([&] {
J.attribute("pid", Pid);
J.attribute("tid", int64_t(Tid));
J.attribute("ph", "X");
J.attribute("ts", StartUs);
J.attribute("dur", DurUs);
J.attribute("name", E.Name);
if (!E.Detail.empty()) {
J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
}
});
};
for (const Entry &E : Entries)
writeEvent(E, this->Tid);
for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances)
for (const Entry &E : TTP->Entries)
writeEvent(E, TTP->Tid);
// Emit totals by section name as additional "thread" events, sorted from
// longest one.
// Find highest used thread id.
uint64_t MaxTid = this->Tid;
for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances)
MaxTid = std::max(MaxTid, TTP->Tid);
// Combine all CountAndTotalPerName from threads into one.
StringMap<CountAndDurationType> AllCountAndTotalPerName;
auto combineStat = [&](const auto &Stat) {
StringRef Key = Stat.getKey();
auto Value = Stat.getValue();
auto &CountAndTotal = AllCountAndTotalPerName[Key];
CountAndTotal.first += Value.first;
CountAndTotal.second += Value.second;
};
for (const auto &Stat : CountAndTotalPerName)
combineStat(Stat);
for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances)
for (const auto &Stat : TTP->CountAndTotalPerName)
combineStat(Stat);
std::vector<NameAndCountAndDurationType> SortedTotals;
SortedTotals.reserve(AllCountAndTotalPerName.size());
for (const auto &Total : AllCountAndTotalPerName)
SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue());
llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A,
const NameAndCountAndDurationType &B) {
return A.second.second > B.second.second;
});
// Report totals on separate threads of tracing file.
uint64_t TotalTid = MaxTid + 1;
for (const NameAndCountAndDurationType &Total : SortedTotals) {
auto DurUs = duration_cast<microseconds>(Total.second.second).count();
auto Count = AllCountAndTotalPerName[Total.first].first;
J.object([&] {
J.attribute("pid", Pid);
J.attribute("tid", int64_t(TotalTid));
J.attribute("ph", "X");
J.attribute("ts", 0);
J.attribute("dur", DurUs);
J.attribute("name", "Total " + Total.first);
J.attributeObject("args", [&] {
J.attribute("count", int64_t(Count));
J.attribute("avg ms", int64_t(DurUs / Count / 1000));
});
});
++TotalTid;
}
auto writeMetadataEvent = [&](const char *Name, uint64_t Tid,
StringRef arg) {
J.object([&] {
J.attribute("cat", "");
J.attribute("pid", Pid);
J.attribute("tid", int64_t(Tid));
J.attribute("ts", 0);
J.attribute("ph", "M");
J.attribute("name", Name);
J.attributeObject("args", [&] { J.attribute("name", arg); });
});
};
writeMetadataEvent("process_name", Tid, ProcName);
writeMetadataEvent("thread_name", Tid, ThreadName);
for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances)
writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName);
J.arrayEnd();
J.attributeEnd();
// Emit the absolute time when this TimeProfiler started.
// This can be used to combine the profiling data from
// multiple processes and preserve actual time intervals.
J.attribute("beginningOfTime",
time_point_cast<microseconds>(BeginningOfTime)
.time_since_epoch()
.count());
J.objectEnd();
}
SmallVector<Entry, 16> Stack;
SmallVector<Entry, 128> Entries;
StringMap<CountAndDurationType> CountAndTotalPerName;
const time_point<system_clock> BeginningOfTime;
const TimePointType StartTime;
const std::string ProcName;
const sys::Process::Pid Pid;
SmallString<0> ThreadName;
const uint64_t Tid;
// Minimum time granularity (in microseconds)
const unsigned TimeTraceGranularity;
};
void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
StringRef ProcName) {
assert(TimeTraceProfilerInstance == nullptr &&
"Profiler should not be initialized");
TimeTraceProfilerInstance = new TimeTraceProfiler(
TimeTraceGranularity, llvm::sys::path::filename(ProcName));
}
// Removes all TimeTraceProfilerInstances.
// Called from main thread.
void llvm::timeTraceProfilerCleanup() {
delete TimeTraceProfilerInstance;
TimeTraceProfilerInstance = nullptr;
std::lock_guard<std::mutex> Lock(Mu);
for (auto *TTP : *ThreadTimeTraceProfilerInstances)
delete TTP;
ThreadTimeTraceProfilerInstances->clear();
}
// Finish TimeTraceProfilerInstance on a worker thread.
// This doesn't remove the instance, just moves the pointer to global vector.
void llvm::timeTraceProfilerFinishThread() {
std::lock_guard<std::mutex> Lock(Mu);
ThreadTimeTraceProfilerInstances->push_back(TimeTraceProfilerInstance);
TimeTraceProfilerInstance = nullptr;
}
void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) {
assert(TimeTraceProfilerInstance != nullptr &&
"Profiler object can't be null");
TimeTraceProfilerInstance->write(OS);
}
Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName,
StringRef FallbackFileName) {
assert(TimeTraceProfilerInstance != nullptr &&
"Profiler object can't be null");
std::string Path = PreferredFileName.str();
if (Path.empty()) {
Path = FallbackFileName == "-" ? "out" : FallbackFileName.str();
Path += ".time-trace";
}
std::error_code EC;
[SystemZ][z/OS][Windows] Add new OF_TextWithCRLF flag and use this flag instead of OF_Text Problem: On SystemZ we need to open text files in text mode. On Windows, files opened in text mode adds a CRLF '\r\n' which may not be desirable. Solution: This patch adds two new flags - OF_CRLF which indicates that CRLF translation is used. - OF_TextWithCRLF = OF_Text | OF_CRLF indicates that the file is text and uses CRLF translation. Developers should now use either the OF_Text or OF_TextWithCRLF for text files and OF_None for binary files. If the developer doesn't want carriage returns on Windows, they should use OF_Text, if they do want carriage returns on Windows, they should use OF_TextWithCRLF. So this is the behaviour per platform with my patch: z/OS: OF_None: open in binary mode OF_Text : open in text mode OF_TextWithCRLF: open in text mode Windows: OF_None: open file with no carriage return OF_Text: open file with no carriage return OF_TextWithCRLF: open file with carriage return The Major change is in llvm/lib/Support/Windows/Path.inc to only set text mode if the OF_CRLF is set. ``` if (Flags & OF_CRLF) CrtOpenFlags |= _O_TEXT; ``` These following files are the ones that still use OF_Text which I left unchanged. I modified all these except raw_ostream.cpp in recent patches so I know these were previously in Binary mode on Windows. ./llvm/lib/Support/raw_ostream.cpp ./llvm/lib/TableGen/Main.cpp ./llvm/tools/dsymutil/DwarfLinkerForBinary.cpp ./llvm/unittests/Support/Path.cpp ./clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp ./clang/lib/Frontend/CompilerInstance.cpp ./clang/lib/Driver/Driver.cpp ./clang/lib/Driver/ToolChains/Clang.cpp Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D99426
2021-04-06 19:22:41 +08:00
raw_fd_ostream OS(Path, EC, sys::fs::OF_TextWithCRLF);
if (EC)
return createStringError(EC, "Could not open " + Path);
timeTraceProfilerWrite(OS);
return Error::success();
}
void llvm::timeTraceProfilerBegin(StringRef Name, StringRef Detail) {
if (TimeTraceProfilerInstance != nullptr)
TimeTraceProfilerInstance->begin(std::string(Name),
[&]() { return std::string(Detail); });
}
void llvm::timeTraceProfilerBegin(StringRef Name,
llvm::function_ref<std::string()> Detail) {
if (TimeTraceProfilerInstance != nullptr)
TimeTraceProfilerInstance->begin(std::string(Name), Detail);
}
void llvm::timeTraceProfilerEnd() {
if (TimeTraceProfilerInstance != nullptr)
TimeTraceProfilerInstance->end();
}