Merge pull request #6896 from sfc-gh-mpilman/features/validate-trace-events-in-simulation
Reduce excessive tracing and fail after 1M traces
This commit is contained in:
commit
fe338ced8f
|
@ -265,11 +265,11 @@ void DatabaseContext::getLatestCommitVersions(const Reference<LocationInfo>& loc
|
|||
void updateCachedReadVersionShared(double t, Version v, DatabaseSharedState* p) {
|
||||
MutexHolder mutex(p->mutexLock);
|
||||
if (v >= p->grvCacheSpace.cachedReadVersion) {
|
||||
TraceEvent(SevDebug, "CacheReadVersionUpdate")
|
||||
.detail("Version", v)
|
||||
.detail("CurTime", t)
|
||||
.detail("LastVersion", p->grvCacheSpace.cachedReadVersion)
|
||||
.detail("LastTime", p->grvCacheSpace.lastGrvTime);
|
||||
//TraceEvent(SevDebug, "CacheReadVersionUpdate")
|
||||
// .detail("Version", v)
|
||||
// .detail("CurTime", t)
|
||||
// .detail("LastVersion", p->grvCacheSpace.cachedReadVersion)
|
||||
// .detail("LastTime", p->grvCacheSpace.lastGrvTime);
|
||||
p->grvCacheSpace.cachedReadVersion = v;
|
||||
if (t > p->grvCacheSpace.lastGrvTime) {
|
||||
p->grvCacheSpace.lastGrvTime = t;
|
||||
|
@ -282,11 +282,11 @@ void DatabaseContext::updateCachedReadVersion(double t, Version v) {
|
|||
return updateCachedReadVersionShared(t, v, sharedStatePtr);
|
||||
}
|
||||
if (v >= cachedReadVersion) {
|
||||
TraceEvent(SevDebug, "CachedReadVersionUpdate")
|
||||
.detail("Version", v)
|
||||
.detail("GrvStartTime", t)
|
||||
.detail("LastVersion", cachedReadVersion)
|
||||
.detail("LastTime", lastGrvTime);
|
||||
//TraceEvent(SevDebug, "CachedReadVersionUpdate")
|
||||
// .detail("Version", v)
|
||||
// .detail("GrvStartTime", t)
|
||||
// .detail("LastVersion", cachedReadVersion)
|
||||
// .detail("LastTime", lastGrvTime);
|
||||
cachedReadVersion = v;
|
||||
// Since the time is based on the start of the request, it's possible that we
|
||||
// get a newer version with an older time.
|
||||
|
|
|
@ -106,6 +106,7 @@ struct CycleWorkload : TestWorkload {
|
|||
state Transaction tr(cx);
|
||||
if (deterministicRandom()->random01() >= self->traceParentProbability) {
|
||||
state Span span("CycleClient"_loc);
|
||||
// TraceEvent("CycleTracingTransaction", span.context).log();
|
||||
TraceEvent("CycleTracingTransaction", span.context).log();
|
||||
tr.setOption(FDBTransactionOptions::SPAN_PARENT,
|
||||
BinaryWriter::toValue(span.context, Unversioned()));
|
||||
|
|
|
@ -217,6 +217,7 @@ void FlowKnobs::initialize(Randomize randomize, IsSimulated isSimulated) {
|
|||
init( MAX_TRACE_EVENT_LENGTH, 4000 ); // If the value of this is changed, the corresponding default in Trace.cpp should be changed as well
|
||||
init( ALLOCATION_TRACING_ENABLED, true );
|
||||
init( SIM_SPEEDUP_AFTER_SECONDS, 450 );
|
||||
init( MAX_TRACE_LINES, 1'000'000 );
|
||||
init( CODE_COV_TRACE_EVENT_SEVERITY, 10 ); // Code coverage TraceEvent severity level
|
||||
|
||||
//TDMetrics
|
||||
|
|
|
@ -271,6 +271,7 @@ public:
|
|||
double MAX_BUGGIFIED_DELAY;
|
||||
int SIM_CONNECT_ERROR_MODE;
|
||||
double SIM_SPEEDUP_AFTER_SECONDS;
|
||||
int MAX_TRACE_LINES;
|
||||
|
||||
// Tracefiles
|
||||
int ZERO_LENGTH_FILE_PAD;
|
||||
|
|
|
@ -53,6 +53,8 @@
|
|||
// 2. To avoid a historically documented but unknown crash that occurs when logging allocations
|
||||
// during an open trace event
|
||||
thread_local int g_allocation_tracing_disabled = 1;
|
||||
unsigned tracedLines = 0;
|
||||
thread_local int failedLineOverflow = 0;
|
||||
|
||||
ITraceLogIssuesReporter::~ITraceLogIssuesReporter() {}
|
||||
|
||||
|
@ -394,17 +396,22 @@ public:
|
|||
eventBuffer.push_back(fields);
|
||||
bufferLength += fields.sizeBytes();
|
||||
|
||||
// If we have queued up a large number of events in simulation, then throw an error. This makes it easier to
|
||||
// diagnose cases where we get stuck in a loop logging trace events that eventually runs out of memory.
|
||||
// Without this we would never see any trace events from that loop, and it would be more difficult to identify
|
||||
// where the process is actually stuck.
|
||||
if (g_network && g_network->isSimulated() && bufferLength > 1e8) {
|
||||
fprintf(stderr, "Trace log buffer overflow\n");
|
||||
fprintf(stderr, "Last event: %s\n", fields.toString().c_str());
|
||||
// Setting this to 0 avoids a recurse from the assertion trace event and also prevents a situation where
|
||||
// we roll the trace log only to log the single assertion event when using --crash.
|
||||
bufferLength = 0;
|
||||
ASSERT(false);
|
||||
if (g_network && g_network->isSimulated()) {
|
||||
// Throw an error if we have queued up a large number of events in simulation. This makes it easier to
|
||||
// diagnose cases where we get stuck in a loop logging trace events that eventually runs out of memory.
|
||||
// Without this we would never see any trace events from that loop, and it would be more difficult to
|
||||
// identify where the process is actually stuck.
|
||||
if (bufferLength > 1e8) {
|
||||
fprintf(stderr, "Trace log buffer overflow\n");
|
||||
fprintf(stderr, "Last event: %s\n", fields.toString().c_str());
|
||||
// Setting this to 0 avoids a recurse from the assertion trace event and also prevents a situation where
|
||||
// we roll the trace log only to log the single assertion event when using --crash.
|
||||
bufferLength = 0;
|
||||
ASSERT(false);
|
||||
}
|
||||
if (++tracedLines > FLOW_KNOBS->MAX_TRACE_LINES && failedLineOverflow == 0) {
|
||||
failedLineOverflow = 1; // we only want to do this once
|
||||
}
|
||||
}
|
||||
|
||||
if (trackError) {
|
||||
|
@ -1265,6 +1272,11 @@ void BaseTraceEvent::log() {
|
|||
|
||||
BaseTraceEvent::~BaseTraceEvent() {
|
||||
log();
|
||||
if (failedLineOverflow == 1) {
|
||||
failedLineOverflow = 2;
|
||||
TraceEvent(SevError, "TracedTooManyLines").log();
|
||||
crashAndDie();
|
||||
}
|
||||
}
|
||||
|
||||
thread_local bool BaseTraceEvent::networkThread = false;
|
||||
|
|
Loading…
Reference in New Issue