diff --git a/fdbclient/ClientKnobs.cpp b/fdbclient/ClientKnobs.cpp index 13e403a86e..d196f915c1 100644 --- a/fdbclient/ClientKnobs.cpp +++ b/fdbclient/ClientKnobs.cpp @@ -269,6 +269,7 @@ void ClientKnobs::initialize(Randomize randomize) { init( CONSISTENCY_CHECK_RATE_LIMIT_MAX, 50e6 ); // Limit in per sec init( CONSISTENCY_CHECK_ONE_ROUND_TARGET_COMPLETION_TIME, 7 * 24 * 60 * 60 ); // 7 days + init( CONSISTENCY_CHECK_URGENT_NEXT_WAIT_TIME, 600 ); init( CONSISTENCY_CHECK_URGENT_BATCH_SHARD_COUNT, 10 ); if( randomize && BUGGIFY ) CONSISTENCY_CHECK_URGENT_BATCH_SHARD_COUNT = 2; init( CONSISTENCY_CHECK_URGENT_RETRY_DEPTH_MAX, 10 ); if( randomize && BUGGIFY ) CONSISTENCY_CHECK_URGENT_RETRY_DEPTH_MAX = 1; init( CONSISTENCY_CHECK_URGENT_RANGE_BEGIN_0, "" ); if( randomize && BUGGIFY ) CONSISTENCY_CHECK_URGENT_RANGE_BEGIN_0 = ""; diff --git a/fdbclient/include/fdbclient/ClientKnobs.h b/fdbclient/include/fdbclient/ClientKnobs.h index 5240ffd647..44fe3ff6f2 100644 --- a/fdbclient/include/fdbclient/ClientKnobs.h +++ b/fdbclient/include/fdbclient/ClientKnobs.h @@ -257,6 +257,7 @@ public: int CONSISTENCY_CHECK_RATE_LIMIT_MAX; // Available in both normal and urgent mode int CONSISTENCY_CHECK_ONE_ROUND_TARGET_COMPLETION_TIME; // Available in normal mode + int CONSISTENCY_CHECK_URGENT_NEXT_WAIT_TIME; // Available in urgent mode int CONSISTENCY_CHECK_URGENT_BATCH_SHARD_COUNT; // Available in urgent mode int CONSISTENCY_CHECK_URGENT_RETRY_DEPTH_MAX; // Available in urgent mode std::string CONSISTENCY_CHECK_URGENT_RANGE_BEGIN_0; // Available in urgent mode diff --git a/fdbserver/tester.actor.cpp b/fdbserver/tester.actor.cpp index 93117652db..3bee6e4114 100644 --- a/fdbserver/tester.actor.cpp +++ b/fdbserver/tester.actor.cpp @@ -1594,7 +1594,7 @@ ACTOR Future> getTesters(ReferenceonChange())) {} when(wait(testerTimeout)) { - TraceEvent(SevError, "TesterRecruitmentTimeout").log(); + TraceEvent(SevWarnAlways, "TesterRecruitmentTimeout"); throw timed_out(); } } @@ -1792,6 +1792,7 @@ ACTOR Future runConsistencyCheckerUrgentCore(Reference ts; // used to store testers interface state std::vector rangesToCheck; // get from globalProgressMap state std::vector shardsToCheck; // get from keyServer metadata + state Optional whenFailedToGetTesterStart; // Initialize globalProgressMap Optional> rangesToCheck_ = loadRangesToCheckFromKnob(); @@ -1838,7 +1839,19 @@ ACTOR Future runConsistencyCheckerUrgentCore(Reference 3600 * 24) { // 1 day + TraceEvent(SevError, "TesterRecruitmentTimeout"); + } + } + throw e; + } if (g_network->isSimulated() && deterministicRandom()->random01() < 0.05) { throw operation_failed(); // Introduce random failure } @@ -1908,9 +1921,24 @@ ACTOR Future runConsistencyCheckerUrgentCore(Reference runConsistencyCheckerUrgentHolder(Reference>> cc, + Database cx, + Optional> testers, + int minTestersExpected, + bool repeatRun) { + loop { + wait(runConsistencyCheckerUrgentCore(cc, cx, testers, minTestersExpected)); + if (!repeatRun) { + break; + } + wait(delay(CLIENT_KNOBS->CONSISTENCY_CHECK_URGENT_NEXT_WAIT_TIME)); + } + return Void(); +} + Future checkConsistencyUrgentSim(Database cx, std::vector testers) { - return runConsistencyCheckerUrgentCore( - Reference>>(), cx, testers, 1); + return runConsistencyCheckerUrgentHolder( + Reference>>(), cx, testers, 1, /*repeatRun=*/false); } ACTOR Future runTest(Database cx, @@ -3018,9 +3046,10 @@ ACTOR Future runTests(Reference connRecord, state Reference> dbInfo(new AsyncVar); state Future ccMonitor = monitorServerDBInfo(cc, LocalityData(), dbInfo); // FIXME: locality cx = openDBOnServer(dbInfo); - tests = reportErrors( - runConsistencyCheckerUrgentCore(cc, cx, Optional>(), minTestersExpected), - "runConsistencyCheckerUrgentCore"); + tests = + reportErrors(runConsistencyCheckerUrgentHolder( + cc, cx, Optional>(), minTestersExpected, /*repeatRun=*/true), + "runConsistencyCheckerUrgentHolder"); } else if (at == TEST_HERE) { auto db = makeReference>(); std::vector iTesters(1);