Merge pull request #3653 from etschannen/feature-proxy-busy-loadbalance

Changed proxy load balancing to balance on CPU usage
2020-08-31 10:39:08 -07:00 · 2020-08-31 10:39:08 -07:00 · 5c97461d18
parent bbb178c430 2f5359fa13
commit 5c97461d18
8 changed files with 27 additions and 49 deletions
--- a/fdbclient/MasterProxyInterface.h
+++ b/fdbclient/MasterProxyInterface.h
@ -191,7 +191,7 @@ struct GetReadVersionReply : public BasicLoadBalancedReply {

 	template <class Ar>
 	void serialize(Ar& ar) {
-		serializer(ar, BasicLoadBalancedReply::recentRequests, version, locked, metadataVersion, tagThrottleInfo);
+		serializer(ar, BasicLoadBalancedReply::processBusyTime, version, locked, metadataVersion, tagThrottleInfo);
 	}
 };

--- a/fdbrpc/LoadBalance.actor.h
+++ b/fdbrpc/LoadBalance.actor.h
@ -458,8 +458,8 @@ Future< REPLY_TYPE(Request) > loadBalance(
 // Subclasses must initialize all members in their default constructors
 // Subclasses must serialize all members
 struct BasicLoadBalancedReply {
-	int recentRequests;
-	BasicLoadBalancedReply() : recentRequests(0) {}
+	int processBusyTime;
+	BasicLoadBalancedReply() : processBusyTime(0) {}
 };

 Optional<BasicLoadBalancedReply> getBasicLoadBalancedReply(const BasicLoadBalancedReply *reply);
@ -528,7 +528,7 @@ Future< REPLY_TYPE(Request) > basicLoadBalance(
 			if(result.present()) {
 				Optional<BasicLoadBalancedReply> loadBalancedReply = getBasicLoadBalancedReply(&result.get());
 				if(loadBalancedReply.present()) {
-					alternatives->updateRecent( useAlt, loadBalancedReply.get().recentRequests );
+					alternatives->updateRecent( useAlt, loadBalancedReply.get().processBusyTime );
 				}

 				return result.get();
--- a/fdbrpc/MultiInterface.h
+++ b/fdbrpc/MultiInterface.h
@ -62,10 +62,10 @@ struct AlternativeInfo {
 	T interf;
 	double probability;
 	double cumulativeProbability;
-	int recentRequests;
+	int processBusyTime;
 	double lastUpdate;

-	AlternativeInfo(T const& interf, double probability, double cumulativeProbability) : interf(interf), probability(probability), cumulativeProbability(cumulativeProbability), recentRequests(-1), lastUpdate(0) {}
+	AlternativeInfo(T const& interf, double probability, double cumulativeProbability) : interf(interf), probability(probability), cumulativeProbability(cumulativeProbability), processBusyTime(-1), lastUpdate(0) {}

 	bool operator < (double const& r) const {
 		return cumulativeProbability < r;
@ -100,26 +100,28 @@ public:
 		return std::lower_bound( alternatives.begin(), alternatives.end(), deterministicRandom()->random01() ) - alternatives.begin();
 	}

-	void updateRecent( int index, int recentRequests ) {
-		alternatives[index].recentRequests = recentRequests;
+	void updateRecent( int index, int processBusyTime ) {
+		alternatives[index].processBusyTime = processBusyTime;
 		alternatives[index].lastUpdate = now();
 	}

 	void updateProbabilities() {
-		double totalRequests = 0;
+		double totalBusyTime = 0;
 		for(auto& it : alternatives) {
-			totalRequests += it.recentRequests;
+			totalBusyTime += it.processBusyTime;
 			if(now() - it.lastUpdate > FLOW_KNOBS->BASIC_LOAD_BALANCE_UPDATE_RATE/2.0) {
 				return;
 			}
 		}
-		if(totalRequests < 1000) {
+
+		//Do not update probabilities if the average proxy busyness is less than 5%
+		if(totalBusyTime < FLOW_KNOBS->BASIC_LOAD_BALANCE_MIN_AMOUNT*alternatives.size()) {
 			return;
 		}
 		
 		double totalProbability = 0;
 		for(auto& it : alternatives) {
-			it.probability += (1.0/alternatives.size()-(it.recentRequests/totalRequests))*FLOW_KNOBS->BASIC_LOAD_BALANCE_MAX_CHANGE;
+			it.probability += (1.0/alternatives.size()-(it.processBusyTime/totalBusyTime))*FLOW_KNOBS->BASIC_LOAD_BALANCE_MAX_CHANGE;
 			it.probability = std::max(it.probability, 1/(FLOW_KNOBS->BASIC_LOAD_BALANCE_MAX_PROB*alternatives.size()));
 			it.probability = std::min(it.probability, FLOW_KNOBS->BASIC_LOAD_BALANCE_MAX_PROB/alternatives.size());
 			totalProbability += it.probability;
--- a/fdbserver/MasterProxyServer.actor.cpp
+++ b/fdbserver/MasterProxyServer.actor.cpp
@ -96,33 +96,8 @@ struct ProxyStats {

 	Future<Void> logger;

-	int recentRequests;
-	Deque<int> requestBuckets;
-	double lastBucketBegin;
-	double bucketInterval;
-
-	void updateRequestBuckets() {
-		while(now() - lastBucketBegin > bucketInterval) {
-			lastBucketBegin += bucketInterval;
-			recentRequests -= requestBuckets.front();
-			requestBuckets.pop_front();
-			requestBuckets.push_back(0);
-		}
-	}
-
-	void addRequest() {
-		updateRequestBuckets();
-		++recentRequests;
-		++requestBuckets.back();
-	}
-
-	int getRecentRequests() {
-		updateRequestBuckets();
-		return recentRequests*FLOW_KNOBS->BASIC_LOAD_BALANCE_UPDATE_RATE/(FLOW_KNOBS->BASIC_LOAD_BALANCE_UPDATE_RATE-(lastBucketBegin+bucketInterval-now()));
-	}
-
 	explicit ProxyStats(UID id, Version* pVersion, NotifiedVersion* pCommittedVersion, int64_t *commitBatchesMemBytesCountPtr)
-	  : cc("ProxyStats", id.toString()), recentRequests(0), lastBucketBegin(now()), bucketInterval(FLOW_KNOBS->BASIC_LOAD_BALANCE_UPDATE_RATE/FLOW_KNOBS->BASIC_LOAD_BALANCE_BUCKETS),
+	  : cc("ProxyStats", id.toString()),
 	    txnRequestIn("TxnRequestIn", cc), txnRequestOut("TxnRequestOut", cc),
 	    txnRequestErrors("TxnRequestErrors", cc), txnStartIn("TxnStartIn", cc), txnStartOut("TxnStartOut", cc),
 		txnStartBatch("TxnStartBatch", cc), txnSystemPriorityStartIn("TxnSystemPriorityStartIn", cc),
@ -148,9 +123,6 @@ struct ProxyStats {
 		specialCounter(cc, "CommittedVersion", [pCommittedVersion](){ return pCommittedVersion->get(); });
 		specialCounter(cc, "CommitBatchesMemBytesCount", [commitBatchesMemBytesCountPtr]() { return *commitBatchesMemBytesCountPtr; });
 		logger = traceCounters("ProxyMetrics", id, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "ProxyMetrics");
-		for(int i = 0; i < FLOW_KNOBS->BASIC_LOAD_BALANCE_BUCKETS; i++) {
-			requestBuckets.push_back(0);
-		}
 	}
 };

@ -310,7 +282,6 @@ ACTOR Future<Void> queueTransactionStartRequests(
 	loop choose{
 		when(GetReadVersionRequest req = waitNext(readVersionRequests)) {
 			//WARNING: this code is run at a high priority, so it needs to do as little work as possible
-			stats->addRequest();
 			if( stats->txnRequestIn.getValue() - stats->txnRequestOut.getValue() > SERVER_KNOBS->START_TRANSACTION_MAX_QUEUE_SIZE ) {
 				++stats->txnRequestErrors;
 				//FIXME: send an error instead of giving an unreadable version when the client can support the error: req.reply.sendError(proxy_memory_limit_exceeded());
@ -629,7 +600,6 @@ ACTOR Future<Void> commitBatcher(ProxyCommitData *commitData, PromiseStream<std:
 			choose{
 				when(CommitTransactionRequest req = waitNext(in)) {
 					//WARNING: this code is run at a high priority, so it needs to do as little work as possible
-					commitData->stats.addRequest();
 					int bytes = getBytes(req);

 					// Drop requests if memory is under severe pressure
@ -1439,7 +1409,7 @@ ACTOR Future<GetReadVersionReply> getLiveCommittedVersion(ProxyCommitData* commi
 			rep = v;
 		}
 	}
-	rep.recentRequests = commitData->stats.getRecentRequests();
+	rep.processBusyTime = 1e6 * (g_network->isSimulated() ? deterministicRandom()->random01() : g_network->networkInfo.metrics.lastRunLoopBusyness);

 	if (debugID.present()) {
 		g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), "MasterProxyServer.getLiveCommittedVersion.After");
@ -1710,7 +1680,6 @@ ACTOR static Future<Void> readRequestServer( MasterProxyInterface proxy, Promise
 	loop {
 		GetKeyServerLocationsRequest req = waitNext(proxy.getKeyServersLocations.getFuture());
 		//WARNING: this code is run at a high priority, so it needs to do as little work as possible
-		commitData->stats.addRequest();
 		if(req.limit != CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT && //Always do data distribution requests
 		   commitData->stats.keyServerLocationIn.getValue() - commitData->stats.keyServerLocationOut.getValue() > SERVER_KNOBS->KEY_LOCATION_MAX_QUEUE_SIZE) {
 			++commitData->stats.keyServerLocationErrors;
--- a/flow/Knobs.cpp
+++ b/flow/Knobs.cpp
@ -211,10 +211,10 @@ void FlowKnobs::initialize(bool randomize, bool isSimulated) {
 	init( FUTURE_VERSION_BACKOFF_GROWTH,                       2.0 );
 	init( LOAD_BALANCE_MAX_BAD_OPTIONS,                          1 ); //should be the same as MAX_MACHINES_FALLING_BEHIND
 	init( LOAD_BALANCE_PENALTY_IS_BAD,                        true );
-	init( BASIC_LOAD_BALANCE_UPDATE_RATE,                      2.0 );
-	init( BASIC_LOAD_BALANCE_MAX_CHANGE,                      0.05 );
+	init( BASIC_LOAD_BALANCE_UPDATE_RATE,                     10.0 ); //should be longer than the rate we log network metrics
+	init( BASIC_LOAD_BALANCE_MAX_CHANGE,                      0.10 );
 	init( BASIC_LOAD_BALANCE_MAX_PROB,                         2.0 );
-	init( BASIC_LOAD_BALANCE_BUCKETS,                           40 );
+	init( BASIC_LOAD_BALANCE_MIN_AMOUNT,                     50000 ); //Will not update probabilities if the average proxy busyness is less than 5%

 	// Health Monitor
 	init( FAILURE_DETECTION_DELAY,                             4.0 ); if( randomize && BUGGIFY ) FAILURE_DETECTION_DELAY = 1.0;
--- a/flow/Knobs.h
+++ b/flow/Knobs.h
@ -233,7 +233,7 @@ public:
 	double BASIC_LOAD_BALANCE_UPDATE_RATE;
 	double BASIC_LOAD_BALANCE_MAX_CHANGE;
 	double BASIC_LOAD_BALANCE_MAX_PROB;
-	int BASIC_LOAD_BALANCE_BUCKETS;
+	double BASIC_LOAD_BALANCE_MIN_AMOUNT;

 	// Health Monitor
 	int FAILURE_DETECTION_DELAY;
--- a/flow/SystemMonitor.cpp
+++ b/flow/SystemMonitor.cpp
@ -166,6 +166,7 @@ SystemStatistics customSystemMonitor(std::string eventName, StatisticsState *sta
 				n.detail(format("PriorityBusy%d", itr.first).c_str(), itr.second);
 			}

+			bool firstTracker = true;
 			for (auto &itr : g_network->networkInfo.metrics.starvationTrackers) {
 				if(itr.active) {
 					itr.duration += now() - itr.windowedTimer;
@ -176,6 +177,11 @@ SystemStatistics customSystemMonitor(std::string eventName, StatisticsState *sta
 				n.detail(format("PriorityStarvedBelow%d", itr.priority).c_str(), std::min(currentStats.elapsed, itr.duration));
 				n.detail(format("PriorityMaxStarvedBelow%d", itr.priority).c_str(), itr.maxDuration);

+				if(firstTracker) {
+					g_network->networkInfo.metrics.lastRunLoopBusyness = std::min(currentStats.elapsed, itr.duration)/currentStats.elapsed;
+					firstTracker = false;
+				}
+
 				itr.duration = 0;
 				itr.maxDuration = 0;
 			}
--- a/flow/network.h
+++ b/flow/network.h
@ -336,6 +336,7 @@ struct NetworkMetrics {
 	};

 	std::unordered_map<TaskPriority, struct PriorityStats> activeTrackers;
+	double lastRunLoopBusyness;
 	std::vector<struct PriorityStats> starvationTrackers;

 	static const std::vector<int> starvationBins;