adjust CPU pivot knobs to hack simulation test

2023-03-28 16:27:56 -07:00 · 2023-03-28 16:27:56 -07:00 · 5648f827a0
parent 990ad26d8b
commit 5648f827a0
6 changed files with 61 additions and 39 deletions
--- a/fdbclient/ServerKnobs.cpp
+++ b/fdbclient/ServerKnobs.cpp
@ -169,12 +169,14 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
 	init( PRIORITY_ENFORCE_MOVE_OUT_OF_PHYSICAL_SHARD,           960 ); if( randomize && BUGGIFY ) PRIORITY_ENFORCE_MOVE_OUT_OF_PHYSICAL_SHARD = 360; // Set as the lowest priority

 	// Data distribution
-	init( AVAILABLE_SPACE_PIVOT_PERCENT,                          0.6);
-	init( CPU_PIVOT_PERCENT,                                      0.8);
+	init( AVAILABLE_SPACE_PIVOT_RATIO,                         0.6 );
+	init( CPU_PIVOT_RATIO,                                     0.9 );
 	// In order to make sure GetTeam has enough eligible destination team:
-	ASSERT_GT(AVAILABLE_SPACE_PIVOT_PERCENT + CPU_PIVOT_PERCENT, 1.0 );
-	init( MAX_DEST_CPU_PERCENT, 								98.0 );
-	init( CPU_STABLE_INTERVAL,                                 300.0 );
+	ASSERT_GT(AVAILABLE_SPACE_PIVOT_RATIO + CPU_PIVOT_RATIO, 1.0 );
+	// In simulation, the CPU percent of every storage server is hard-coded as 100.0%. It is difficult to test pivot CPU in normal simulation. TODO: add mock DD Test case for it.
+	init( MAX_DEST_CPU_PERCENT, 		   isSimulated ? 100.0:	98.0 );
+	init( CPU_STABLE_INTERVAL,            isSimulated ?  0.0 : 300.0 );
+	init( DD_TEAM_PIVOT_UPDATE_DELAY,                            5.0 );

 	init( SHARD_ENCODE_LOCATION_METADATA,                       false ); if( randomize && BUGGIFY )  SHARD_ENCODE_LOCATION_METADATA = true;
 	init( ENABLE_DD_PHYSICAL_SHARD,                             false ); // EXPERIMENTAL; If true, SHARD_ENCODE_LOCATION_METADATA must be true; When true, optimization of data move between DCs is disabled
@ -755,7 +757,6 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
 	init( MIN_AVAILABLE_SPACE_RATIO,                            0.05 );
 	init( MIN_AVAILABLE_SPACE_RATIO_SAFETY_BUFFER,              0.01 );
 	init( TARGET_AVAILABLE_SPACE_RATIO,                         0.30 );
-	init( AVAILABLE_SPACE_UPDATE_DELAY,                          5.0 );

 	init( MAX_TL_SS_VERSION_DIFFERENCE,                         1e99 ); // if( randomize && BUGGIFY ) MAX_TL_SS_VERSION_DIFFERENCE = std::max(1.0, 0.25 * VERSIONS_PER_SECOND); // spring starts at half this value //FIXME: this knob causes ratekeeper to clamp on idle cluster in simulation that have a large number of logs
 	init( MAX_TL_SS_VERSION_DIFFERENCE_BATCH,                   1e99 );
--- a/fdbclient/include/fdbclient/ServerKnobs.h
+++ b/fdbclient/include/fdbclient/ServerKnobs.h
@ -190,14 +190,17 @@ public:
 	// Data distribution
 	// DD won't move shard to teams that has availableSpaceRatio < max(0.05,  AllTeamAvailSpaceRatio[pivot]), where
 	// pivot = pivot percent * team count.
-	double AVAILABLE_SPACE_PIVOT_PERCENT;
-	// DD won't move shard to teams that has CPU >= AllTeamCPU[pivot], where pivot = pivot percent *
+	double AVAILABLE_SPACE_PIVOT_RATIO;
+	// DD won't move shard to teams that has CPU > AllTeamCPU[pivot], where pivot = pivot percent *
 	// team count.
-	double CPU_PIVOT_PERCENT;
-	// DD won't move shard to teams that has CPU >= MAX_DEST_CPU_PERCENT
+	double CPU_PIVOT_RATIO;
+	// DD won't move shard to teams that has CPU > MAX_DEST_CPU_PERCENT
 	double MAX_DEST_CPU_PERCENT;
-	// DD only move shard to teams that has CPU < pivot CPU for enough time
+	// DD only move shard to teams that has CPU <= pivot CPU for enough time
 	double CPU_STABLE_INTERVAL;
+	// The constant interval DD update pivot values for team selection. It should be >=
+	// min(STORAGE_METRICS_POLLING_DELAY,DETAILED_METRIC_UPDATE_RATE)  otherwise the pivot won't change;
+	double DD_TEAM_PIVOT_UPDATE_DELAY;

 	bool SHARD_ENCODE_LOCATION_METADATA; // If true, location metadata will contain shard ID.
 	bool ENABLE_DD_PHYSICAL_SHARD; // EXPERIMENTAL; If true, SHARD_ENCODE_LOCATION_METADATA must be true.
@ -664,6 +667,7 @@ public:
 	double SMOOTHING_AMOUNT;
 	double SLOW_SMOOTHING_AMOUNT;
 	double METRIC_UPDATE_RATE;
+	// The interval of detailed HealthMetric is pushed to GRV proxies
 	double DETAILED_METRIC_UPDATE_RATE;
 	double LAST_LIMITED_RATIO;
 	double RATEKEEPER_DEFAULT_LIMIT;
@ -745,7 +749,6 @@ public:
 	double MIN_AVAILABLE_SPACE_RATIO;
 	double MIN_AVAILABLE_SPACE_RATIO_SAFETY_BUFFER;
 	double TARGET_AVAILABLE_SPACE_RATIO;
-	double AVAILABLE_SPACE_UPDATE_DELAY;

 	double MAX_TL_SS_VERSION_DIFFERENCE; // spring starts at half this value
 	double MAX_TL_SS_VERSION_DIFFERENCE_BATCH;
--- a/fdbserver/DDTeamCollection.actor.cpp
+++ b/fdbserver/DDTeamCollection.actor.cpp
@ -339,7 +339,10 @@ public:
 				if (randomTeams.empty() && !self->zeroHealthyTeams->get()) {
 					self->bestTeamKeepStuckCount++;
 					if (g_network->isSimulated()) {
-						TraceEvent(SevWarn, "GetTeamReturnEmpty").detail("HealthyTeams", self->healthyTeamCount);
+						TraceEvent(SevWarn, "GetTeamReturnEmpty")
+						    .detail("HealthyTeams", self->healthyTeamCount)
+						    .detail("PivotCPU", self->pivotCPU)
+						    .detail("PivotDiskSpace", self->pivotAvailableSpaceRatio);
 					}
 				} else {
 					self->bestTeamKeepStuckCount = 0;
@ -374,10 +377,14 @@ public:
 					return Void();
 				}
 			}
-			// if (!bestOption.present()) {
-			// 	TraceEvent("GetTeamRequest").detail("Request", req.getDesc());
-			// 	self->traceAllInfo(true);
-			// }
+			if (!bestOption.present()) {
+				TraceEvent("GetTeamRequestDebug")
+				    .detail("Request", req.getDesc())
+				    .detail("HealthyTeams", self->healthyTeamCount)
+				    .detail("PivotCPU", self->pivotCPU)
+				    .detail("PivotDiskSpace", self->pivotAvailableSpaceRatio);
+				self->traceAllInfo(true);
+			}

 			req.reply.send(std::make_pair(bestOption, foundSrc));
 			return Void();
@ -3246,7 +3253,7 @@ public:
 }; // class DDTeamCollectionImpl

 void DDTeamCollection::updateTeamPivotValues() {
-	if (now() - lastPivotValuesUpdate > SERVER_KNOBS->AVAILABLE_SPACE_UPDATE_DELAY) {
+	if (now() - lastPivotValuesUpdate > SERVER_KNOBS->DD_TEAM_PIVOT_UPDATE_DELAY) {
 		lastPivotValuesUpdate = now();
 		std::vector<double> teamAvailableSpace;
 		std::vector<std::pair<double, int>> teamAverageCPU_index;
@ -3256,11 +3263,12 @@ void DDTeamCollection::updateTeamPivotValues() {
 			if (teams[i]->isHealthy()) {
 				teamAvailableSpace.push_back(teams[i]->getMinAvailableSpaceRatio());
 				teamAverageCPU_index.emplace_back(teams[i]->getAverageCPU(), i);
+				minTeamAvgCPU = std::min(minTeamAvgCPU, teamAverageCPU_index.back().first);
 			}
 		}

-		size_t pivot = teamAvailableSpace.size() * std::min(1.0, SERVER_KNOBS->AVAILABLE_SPACE_PIVOT_PERCENT);
-		size_t cpuPivotIndex = teamAverageCPU_index.size() * std::min(1.0, SERVER_KNOBS->CPU_PIVOT_PERCENT);
+		size_t pivot = teamAvailableSpace.size() * std::min(1.0, SERVER_KNOBS->AVAILABLE_SPACE_PIVOT_RATIO);
+		size_t cpuPivotIndex = teamAverageCPU_index.size() * std::min(1.0, SERVER_KNOBS->CPU_PIVOT_RATIO);
 		if (teamAvailableSpace.size() > 1) {
 			std::nth_element(teamAvailableSpace.begin(), teamAvailableSpace.begin() + pivot, teamAvailableSpace.end());
 			pivotAvailableSpaceRatio =
@ -3269,15 +3277,11 @@ void DDTeamCollection::updateTeamPivotValues() {

 			std::nth_element(
 			    teamAverageCPU_index.begin(), teamAverageCPU_index.begin() + cpuPivotIndex, teamAverageCPU_index.end());
-			pivotCPU = std::min(SERVER_KNOBS->MAX_DEST_CPU_PERCENT, teamAverageCPU_index[cpuPivotIndex].first);
-			// set high CPU for teams >= pivot CPU
-			for (int i = cpuPivotIndex; i < teamAverageCPU_index.size(); ++i) {
+			pivotCPU = teamAverageCPU_index[cpuPivotIndex].first;
+			// set high CPU for teams > pivot CPU
+			for (int i = cpuPivotIndex + 1; i < teamAverageCPU_index.size(); ++i) {
 				teams[teamAverageCPU_index[i].second]->setLastHighCPUTime(lastPivotValuesUpdate);
 			}
-			for (int i = cpuPivotIndex - 1; i >= 0 && teamAverageCPU_index[i].first >= pivotCPU; --i) {
-				teams[teamAverageCPU_index[i].second]->setLastHighCPUTime(lastPivotValuesUpdate);
-			}
-
 		} else {
 			pivotAvailableSpaceRatio = SERVER_KNOBS->MIN_AVAILABLE_SPACE_RATIO;
 			pivotCPU = SERVER_KNOBS->MAX_DEST_CPU_PERCENT;
@ -3290,6 +3294,13 @@ void DDTeamCollection::updateTeamPivotValues() {
 			    .detail("Primary", primary);
 			printDetailedTeamsInfo.trigger();
 		}
+
+		if (pivotCPU > SERVER_KNOBS->MAX_DEST_CPU_PERCENT) {
+			TraceEvent(SevWarnAlways, "DDTeamPivotCPUTooHigh", distributorId)
+			    .detail("PivotCPU", pivotCPU)
+			    .detail("MinTeamAvgCPU", minTeamAvgCPU)
+			    .detail("Primary", primary);
+		}
 	}
 }

@ -5768,7 +5779,7 @@ public:
 		state GetTeamRequest req(TeamSelect::WANT_COMPLETE_SRCS,
 		                         PreferLowerDiskUtil::True,
 		                         TeamMustHaveShards::False,
-		                         PreferLowerReadUtil::True);
+		                         PreferLowerReadUtil::False);
 		req.completeSources = completeSources;

 		wait(collection->getTeam(req));
@ -5822,7 +5833,7 @@ public:
 		state GetTeamRequest req(TeamSelect::WANT_COMPLETE_SRCS,
 		                         PreferLowerDiskUtil::True,
 		                         TeamMustHaveShards::False,
-		                         PreferLowerReadUtil::True);
+		                         PreferLowerReadUtil::False);
 		req.completeSources = completeSources;

 		wait(collection->getTeam(req));
@ -5874,7 +5885,7 @@ public:
 		state GetTeamRequest req(TeamSelect::WANT_TRUE_BEST,
 		                         PreferLowerDiskUtil::True,
 		                         TeamMustHaveShards::False,
-		                         PreferLowerReadUtil::True);
+		                         PreferLowerReadUtil::False);
 		req.completeSources = completeSources;

 		wait(collection->getTeam(req));
@ -5978,7 +5989,7 @@ public:
 		state GetTeamRequest req(TeamSelect::WANT_TRUE_BEST,
 		                         PreferLowerDiskUtil::True,
 		                         TeamMustHaveShards::False,
-		                         PreferLowerReadUtil::True);
+		                         PreferLowerReadUtil::False);
 		req.completeSources = completeSources;

 		wait(collection->getTeam(req));
@ -6036,7 +6047,7 @@ public:
 		state GetTeamRequest req(TeamSelect::WANT_TRUE_BEST,
 		                         PreferLowerDiskUtil::True,
 		                         TeamMustHaveShards::False,
-		                         PreferLowerReadUtil::True);
+		                         PreferLowerReadUtil::False);
 		req.completeSources = completeSources;

 		wait(collection->getTeam(req));
@ -6099,13 +6110,15 @@ public:
 		std::set<UID> expectedServers{ UID(4, 0) };
 		std::set<UID> expectedServersHigh{ UID(5, 0) };

-		ASSERT(resTeam.present() && resTeamHigh.present());
+		ASSERT(resTeam.present());
+		ASSERT(resTeamHigh.present());
 		auto servers = resTeam.get()->getServerIDs(), serversHigh = resTeamHigh.get()->getServerIDs();
 		const std::set<UID> selectedServers(servers.begin(), servers.end()),
 		    selectedServersHigh(serversHigh.begin(), serversHigh.end());
 		// for (auto id : selectedServers)
 		// 	std::cout << id.toString() << std::endl;
-		ASSERT(expectedServers == selectedServers && expectedServersHigh == selectedServersHigh);
+		ASSERT(expectedServers == selectedServers);
+		ASSERT(expectedServersHigh == selectedServersHigh);

 		resTeam.get()->addReadInFlightToTeam(50);
 		req.reply.reset();
@ -6152,7 +6165,7 @@ public:
 		state GetTeamRequest req(TeamSelect::WANT_TRUE_BEST,
 		                         PreferLowerDiskUtil::True,
 		                         TeamMustHaveShards::False,
-		                         PreferLowerReadUtil::True);
+		                         PreferLowerReadUtil::False);
 		req.completeSources = completeSources;

 		wait(collection->getTeam(req));
@ -6167,6 +6180,8 @@ public:

 		return Void();
 	}
+
+	ACTOR static Future<Void> GetTeam_CpuUtilSelection() { return Void(); }
 };

 TEST_CASE("DataDistribution/AddTeamsBestOf/UseMachineID") {
@ -6228,6 +6243,7 @@ TEST_CASE("/DataDistribution/GetTeam/ServerUtilizationNearCutoff") {
 	wait(DDTeamCollectionUnitTest::GetTeam_ServerUtilizationNearCutoff());
 	return Void();
 }
+
 TEST_CASE("/DataDistribution/GetTeam/TrueBestLeastReadBandwidth") {
 	wait(DDTeamCollectionUnitTest::GetTeam_TrueBestLeastReadBandwidth());
 	return Void();
--- a/fdbserver/TCInfo.actor.cpp
+++ b/fdbserver/TCInfo.actor.cpp
@ -423,8 +423,8 @@ double TCTeamInfo::getAverageCPU() const {
 			size++;
 		}
 	}
-	// If every storage server hasn't gotten their CPU updated, we assume they are too busy to respond so return 101;
-	return size == 0 ? 101.0 : sum / size;
+	// If every storage server hasn't gotten their CPU updated, we assume they are too busy to respond so return 100.0;
+	return size == 0 ? 100.0 : sum / size;
 }

 int64_t TCTeamInfo::getMinAvailableSpace(bool includeInFlight) const {
--- a/fdbserver/include/fdbserver/DDTeamCollection.h
+++ b/fdbserver/include/fdbserver/DDTeamCollection.h
@ -281,7 +281,8 @@ protected:
 	Future<bool> clearHealthyZoneFuture;
 	double pivotAvailableSpaceRatio;
 	double lastPivotValuesUpdate;
-	double pivotCPU;
+	double pivotCPU = 0.0;
+	double minTeamAvgCPU = 101.0;

 	int lowestUtilizationTeam;
 	int highestUtilizationTeam;
--- a/fdbserver/include/fdbserver/TCInfo.h
+++ b/fdbserver/include/fdbserver/TCInfo.h
@ -225,7 +225,8 @@ public:
 	void setLastHighCPUTime(double time) override { lastHighCPUTime = time; }

 	bool hasLowCpuFor(double cpuThreshold, double duration) const override {
-		return getAverageCPU() < cpuThreshold && now() - lastHighCPUTime >= duration;
+		return getAverageCPU() <= std::min(cpuThreshold, SERVER_KNOBS->MAX_DEST_CPU_PERCENT) &&
+		       now() - lastHighCPUTime >= duration;
 	}

 	int64_t getReadInFlightToTeam() const override;