diff --git a/fdbclient/DatabaseBackupAgent.actor.cpp b/fdbclient/DatabaseBackupAgent.actor.cpp index e1e01672bc..8d2fe7ea4e 100644 --- a/fdbclient/DatabaseBackupAgent.actor.cpp +++ b/fdbclient/DatabaseBackupAgent.actor.cpp @@ -2277,6 +2277,7 @@ public: state Reference tr(new ReadYourWritesTransaction(cx)); tr->setOption(FDBTransactionOptions::LOCK_AWARE); state std::string statusText; + state int retries = 0; loop{ try { @@ -2294,27 +2295,33 @@ public: tr->setOption(FDBTransactionOptions::LOCK_AWARE); state Future> fPaused = tr->get(backupAgent->taskBucket->getPauseKey()); + state Future> fErrorValues = errorLimit > 0 ? tr->getRange(backupAgent->errors.get(BinaryWriter::toValue(logUid, Unversioned())).range(), errorLimit, false, true) : Future>(); + state Future> fBackupUid = tr->get(backupAgent->states.get(BinaryWriter::toValue(logUid, Unversioned())).pack(DatabaseBackupAgent::keyFolderId)); + state Future> fBackupVerison = tr->get(BinaryWriter::toValue(logUid, Unversioned()).withPrefix(applyMutationsBeginRange.begin)); + state Future> fTagName = tr->get(backupAgent->states.get(BinaryWriter::toValue(logUid, Unversioned())).pack(BackupAgentBase::keyConfigBackupTag)); + state Future> fStopVersionKey = tr->get(backupAgent->states.get(BinaryWriter::toValue(logUid, Unversioned())).pack(BackupAgentBase::keyStateStop)); + state Future> fBackupKeysPacked = tr->get(backupAgent->config.get(BinaryWriter::toValue(logUid, Unversioned())).pack(BackupAgentBase::keyConfigBackupRanges)); + int backupStateInt = wait(backupAgent->getStateValue(tr, logUid)); state BackupAgentBase::enumState backupState = (BackupAgentBase::enumState)backupStateInt; - + if (backupState == DatabaseBackupAgent::STATE_NEVERRAN) { statusText += "No previous backups found.\n"; } else { state std::string tagNameDisplay; - Optional tagName = wait(tr->get(backupAgent->states.get(BinaryWriter::toValue(logUid, Unversioned())).pack(BackupAgentBase::keyConfigBackupTag))); + Optional tagName = wait(fTagName); // Define the display tag name if (tagName.present()) { tagNameDisplay = tagName.get().toString(); } - state Optional uid = wait(tr->get(backupAgent->config.get(BinaryWriter::toValue(logUid, Unversioned())).pack(BackupAgentBase::keyFolderId))); - state Optional stopVersionKey = wait(tr->get(backupAgent->states.get(BinaryWriter::toValue(logUid, Unversioned())).pack(BackupAgentBase::keyStateStop))); + state Optional stopVersionKey = wait(fStopVersionKey); + + Optional backupKeysPacked = wait(fBackupKeysPacked); state Standalone> backupRanges; - Optional backupKeysPacked = wait(tr->get(backupAgent->config.get(BinaryWriter::toValue(logUid, Unversioned())).pack(BackupAgentBase::keyConfigBackupRanges))); - if (backupKeysPacked.present()) { BinaryReader br(backupKeysPacked.get(), IncludeVersion()); br >> backupRanges; @@ -2350,7 +2357,7 @@ public: // Append the errors, if requested if (errorLimit > 0) { - Standalone values = wait(tr->getRange(backupAgent->errors.get(BinaryWriter::toValue(logUid, Unversioned())).range(), errorLimit, false, true)); + Standalone values = wait( fErrorValues ); // Display the errors, if any if (values.size() > 0) { @@ -2367,10 +2374,9 @@ public: //calculate time differential - state Optional backupUid = wait(tr->get(backupAgent->states.get(BinaryWriter::toValue(logUid, Unversioned())).pack(DatabaseBackupAgent::keyFolderId))); + Optional backupUid = wait(fBackupUid); if(backupUid.present()) { - Optional v = wait(tr->get(BinaryWriter::toValue(logUid, Unversioned()).withPrefix(applyMutationsBeginRange.begin))); - + Optional v = wait(fBackupVerison); if (v.present()) { state Version destApplyBegin = BinaryReader::fromStringRef(v.get(), Unversioned()); Version sourceVersion = wait(srcReadVersion); @@ -2387,6 +2393,11 @@ public: break; } catch (Error &e) { + retries++; + if(retries > 5) { + statusText += format("\nWARNING: Could not fetch full DR status: %s\n", e.name()); + return statusText; + } wait(tr->onError(e)); } } diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index fa21bc710e..d1282174b6 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -3552,6 +3552,7 @@ public: ACTOR static Future submitBackup(FileBackupAgent* backupAgent, Reference tr, Key outContainer, int snapshotIntervalSeconds, std::string tagName, Standalone> backupRanges, bool stopWhenDone) { tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::LOCK_AWARE); + tr->setOption(FDBTransactionOptions::COMMIT_ON_FIRST_PROXY); TraceEvent(SevInfo, "FBA_SubmitBackup") .detail("TagName", tagName.c_str()) diff --git a/fdbrpc/sim2.actor.cpp b/fdbrpc/sim2.actor.cpp index f9e8b482a3..9d0e516899 100644 --- a/fdbrpc/sim2.actor.cpp +++ b/fdbrpc/sim2.actor.cpp @@ -754,9 +754,9 @@ public: // Everything actually network related is delegated to the Sim2Net class; Sim2 is only concerned with simulating machines and time virtual double now() { return time; } - // timer() can be up to one second ahead of now() + // timer() can be up to 0.1 seconds ahead of now() virtual double timer() { - timerTime += deterministicRandom()->random01()*(time+1.0-timerTime)/2.0; + timerTime += deterministicRandom()->random01()*(time+0.1-timerTime)/2.0; return timerTime; } diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 9ed9f3d159..3d1e8ecdfc 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -240,7 +240,7 @@ public: int64_t physicalBytes = getLoadAverage(); double minAvailableSpaceRatio = getMinAvailableSpaceRatio(includeInFlight); int64_t inFlightBytes = includeInFlight ? getDataInFlightToTeam() / servers.size() : 0; - double availableSpaceMultiplier = SERVER_KNOBS->FREE_SPACE_RATIO_CUTOFF / ( std::max( std::min( SERVER_KNOBS->FREE_SPACE_RATIO_CUTOFF, minAvailableSpaceRatio ), 0.000001 ) ); + double availableSpaceMultiplier = SERVER_KNOBS->AVAILABLE_SPACE_RATIO_CUTOFF / ( std::max( std::min( SERVER_KNOBS->AVAILABLE_SPACE_RATIO_CUTOFF, minAvailableSpaceRatio ), 0.000001 ) ); if(servers.size()>2) { //make sure in triple replication the penalty is high enough that you will always avoid a team with a member at 20% free space availableSpaceMultiplier = availableSpaceMultiplier * availableSpaceMultiplier; diff --git a/fdbserver/Knobs.cpp b/fdbserver/Knobs.cpp index 992f84625c..2ce0aac021 100644 --- a/fdbserver/Knobs.cpp +++ b/fdbserver/Knobs.cpp @@ -184,7 +184,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula init( DD_MERGE_COALESCE_DELAY, isSimulated ? 30.0 : 300.0 ); if( randomize && BUGGIFY ) DD_MERGE_COALESCE_DELAY = 0.001; init( STORAGE_METRICS_POLLING_DELAY, 2.0 ); if( randomize && BUGGIFY ) STORAGE_METRICS_POLLING_DELAY = 15.0; init( STORAGE_METRICS_RANDOM_DELAY, 0.2 ); - init( FREE_SPACE_RATIO_CUTOFF, 0.35 ); + init( AVAILABLE_SPACE_RATIO_CUTOFF, 0.05 ); init( DESIRED_TEAMS_PER_SERVER, 5 ); if( randomize && BUGGIFY ) DESIRED_TEAMS_PER_SERVER = 1; init( MAX_TEAMS_PER_SERVER, 5*DESIRED_TEAMS_PER_SERVER ); init( DD_SHARD_SIZE_GRANULARITY, 5000000 ); @@ -318,6 +318,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula init( ALWAYS_CAUSAL_READ_RISKY, false ); init( MAX_COMMIT_UPDATES, 2000 ); if( randomize && BUGGIFY ) MAX_COMMIT_UPDATES = 1; init( MIN_PROXY_COMPUTE, 0.001 ); + init( MAX_PROXY_COMPUTE, 2.0 ); init( PROXY_COMPUTE_BUCKETS, 20000 ); init( PROXY_COMPUTE_GROWTH_RATE, 0.01 ); diff --git a/fdbserver/Knobs.h b/fdbserver/Knobs.h index 42aee44ef5..c5c41fc58f 100644 --- a/fdbserver/Knobs.h +++ b/fdbserver/Knobs.h @@ -149,8 +149,7 @@ public: double DD_MERGE_COALESCE_DELAY; double STORAGE_METRICS_POLLING_DELAY; double STORAGE_METRICS_RANDOM_DELAY; - double FREE_SPACE_RATIO_CUTOFF; - double FREE_SPACE_CUTOFF_PENALTY; + double AVAILABLE_SPACE_RATIO_CUTOFF; int DESIRED_TEAMS_PER_SERVER; int MAX_TEAMS_PER_SERVER; int64_t DD_SHARD_SIZE_GRANULARITY; @@ -264,6 +263,7 @@ public: bool ALWAYS_CAUSAL_READ_RISKY; int MAX_COMMIT_UPDATES; double MIN_PROXY_COMPUTE; + double MAX_PROXY_COMPUTE; int PROXY_COMPUTE_BUCKETS; double PROXY_COMPUTE_GROWTH_RATE; diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/MasterProxyServer.actor.cpp index 3ef38de80a..7859b4a4e5 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/MasterProxyServer.actor.cpp @@ -534,7 +534,7 @@ ACTOR Future commitBatch( /////// Phase 1: Pre-resolution processing (CPU bound except waiting for a version # which is separately pipelined and *should* be available by now (unless empty commit); ordered; currently atomic but could yield) TEST(self->latestLocalCommitBatchResolving.get() < localBatchNumber-1); // Queuing pre-resolution commit processing wait(self->latestLocalCommitBatchResolving.whenAtLeast(localBatchNumber-1)); - state Future releaseDelay = delay(batchOperations*self->commitComputePerOperation[latencyBucket], TaskPriority::ProxyMasterVersionReply); + state Future releaseDelay = delay(std::min(SERVER_KNOBS->MAX_PROXY_COMPUTE, batchOperations*self->commitComputePerOperation[latencyBucket]), TaskPriority::ProxyMasterVersionReply); if (debugID.present()) g_traceBatch.addEvent("CommitDebug", debugID.get().first(), "MasterProxyServer.commitBatch.GettingCommitVersion");