Merge pull request #4317 from sfc-gh-anoyes/anoyes/fix-buggify-memory-error

Fix memory errors
This commit is contained in:
Trevor Clinkenbeard 2021-02-11 11:36:54 -08:00 committed by GitHub
commit ad0b59ac73
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 17 additions and 10 deletions

View File

@ -4,6 +4,10 @@
Release Notes
#############
6.2.31
======
* Fix a rare invalid memory access on data distributor when snapshotting large clusters. This is a follow up to `PR #4076 <https://github.com/apple/foundationdb/pull/4076>`_. `(PR #4317) <https://github.com/apple/foundationdb/pull/4317>`_
6.2.30
======
* A storage server which has fallen behind will deprioritize reads in order to catch up. This change causes some saturating workloads to experience high read latencies instead of high GRV latencies. `(PR #4218) <https://github.com/apple/foundationdb/pull/4218>`_

View File

@ -4625,7 +4625,6 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self)
}
catch( Error &e ) {
state Error err = e;
trackerCancelled = true;
wait(shards.clearAsync());
if (err.code() != error_code_movekeys_conflict) throw err;
bool ddEnabled = wait( isDataDistributionEnabled(cx) );

View File

@ -212,7 +212,7 @@ ACTOR Future<Void> dataDistributionTracker(Reference<InitialDataDistribution> in
FutureStream<Promise<int64_t>> getAverageShardBytes,
Promise<Void> readyToStart, Reference<AsyncVar<bool>> zeroHealthyTeams,
UID distributorId, KeyRangeMap<ShardTrackedData>* shards,
bool const* trackerCancelled);
bool* trackerCancelled);
ACTOR Future<Void> dataDistributionQueue(
Database cx, PromiseStream<RelocateShard> output, FutureStream<RelocateShard> input,

View File

@ -79,7 +79,7 @@ struct DataDistributionTracker {
// The reference to trackerCancelled must be extracted by actors,
// because by the time (trackerCancelled == true) this memory cannot
// be accessed
bool const& trackerCancelled;
bool& trackerCancelled;
// This class extracts the trackerCancelled reference from a DataDistributionTracker object
// Because some actors spawned by the dataDistributionTracker outlive the DataDistributionTracker
@ -108,7 +108,7 @@ struct DataDistributionTracker {
PromiseStream<RelocateShard> const& output,
Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure,
Reference<AsyncVar<bool>> anyZeroHealthyTeams, KeyRangeMap<ShardTrackedData>& shards,
bool const& trackerCancelled)
bool& trackerCancelled)
: cx(cx), distributorId(distributorId), dbSizeEstimate(new AsyncVar<int64_t>()), systemSizeEstimate(0),
maxShardSize(new AsyncVar<Optional<int64_t>>()), sizeChanges(false), readyToStart(readyToStart), output(output),
shardsAffectedByTeamFailure(shardsAffectedByTeamFailure), anyZeroHealthyTeams(anyZeroHealthyTeams),
@ -116,6 +116,7 @@ struct DataDistributionTracker {
~DataDistributionTracker()
{
trackerCancelled = true;
//Cancel all actors so they aren't waiting on sizeChanged broken promise
sizeChanges.clear(false);
}
@ -766,7 +767,7 @@ ACTOR Future<Void> dataDistributionTracker(Reference<InitialDataDistribution> in
FutureStream<Promise<int64_t>> getAverageShardBytes,
Promise<Void> readyToStart, Reference<AsyncVar<bool>> anyZeroHealthyTeams,
UID distributorId, KeyRangeMap<ShardTrackedData>* shards,
bool const* trackerCancelled) {
bool* trackerCancelled) {
state DataDistributionTracker self(cx, distributorId, readyToStart, output, shardsAffectedByTeamFailure,
anyZeroHealthyTeams, *shards, *trackerCancelled);
state Future<Void> loggingTrigger = Void();

View File

@ -1461,6 +1461,9 @@ int main(int argc, char* argv[]) {
delete FLOW_KNOBS;
delete SERVER_KNOBS;
delete CLIENT_KNOBS;
FLOW_KNOBS = nullptr;
SERVER_KNOBS = nullptr;
CLIENT_KNOBS = nullptr;
FlowKnobs* flowKnobs = new FlowKnobs(true, role == Simulation);
ClientKnobs* clientKnobs = new ClientKnobs(true);
ServerKnobs* serverKnobs = new ServerKnobs(true, clientKnobs, role == Simulation);

View File

@ -805,7 +805,7 @@ bool TraceEvent::init() {
detail("Severity", int(severity));
detail("Time", "0.000000");
timeIndex = fields.size() - 1;
if (FLOW_KNOBS->TRACE_DATETIME_ENABLED) {
if (FLOW_KNOBS && FLOW_KNOBS->TRACE_DATETIME_ENABLED) {
detail("DateTime", "");
}
@ -1028,7 +1028,7 @@ void TraceEvent::log() {
if (enabled) {
double time = TraceEvent::getCurrentTime();
fields.mutate(timeIndex).second = format("%.6f", time);
if (FLOW_KNOBS->TRACE_DATETIME_ENABLED) {
if (FLOW_KNOBS && FLOW_KNOBS->TRACE_DATETIME_ENABLED) {
fields.mutate(timeIndex + 1).second = TraceEvent::printRealTime(time);
}
@ -1193,7 +1193,7 @@ void TraceBatch::dump() {
TraceBatch::EventInfo::EventInfo(double time, const char *name, uint64_t id, const char *location) {
fields.addField("Severity", format("%d", (int)SevInfo));
fields.addField("Time", format("%.6f", time));
if (FLOW_KNOBS->TRACE_DATETIME_ENABLED) {
if (FLOW_KNOBS && FLOW_KNOBS->TRACE_DATETIME_ENABLED) {
fields.addField("DateTime", TraceEvent::printRealTime(time));
}
fields.addField("Type", name);
@ -1204,7 +1204,7 @@ TraceBatch::EventInfo::EventInfo(double time, const char *name, uint64_t id, con
TraceBatch::AttachInfo::AttachInfo(double time, const char *name, uint64_t id, uint64_t to) {
fields.addField("Severity", format("%d", (int)SevInfo));
fields.addField("Time", format("%.6f", time));
if (FLOW_KNOBS->TRACE_DATETIME_ENABLED) {
if (FLOW_KNOBS && FLOW_KNOBS->TRACE_DATETIME_ENABLED) {
fields.addField("DateTime", TraceEvent::printRealTime(time));
}
fields.addField("Type", name);
@ -1215,7 +1215,7 @@ TraceBatch::AttachInfo::AttachInfo(double time, const char *name, uint64_t id, u
TraceBatch::BuggifyInfo::BuggifyInfo(double time, int activated, int line, std::string file) {
fields.addField("Severity", format("%d", (int)SevInfo));
fields.addField("Time", format("%.6f", time));
if (FLOW_KNOBS->TRACE_DATETIME_ENABLED) {
if (FLOW_KNOBS && FLOW_KNOBS->TRACE_DATETIME_ENABLED) {
fields.addField("DateTime", TraceEvent::printRealTime(time));
}
fields.addField("Type", "BuggifySection");