foundationdb/fdbserver/Status.actor.cpp

/*
 * Status.actor.cpp
 *
 * This source file is part of the FoundationDB open source project
 *
 * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "Status.h"
#include "flow/actorcompiler.h"
#include "flow/Trace.h"
#include "fdbclient/NativeAPI.h"
#include "fdbclient/SystemData.h"
#include "fdbclient/ReadYourWrites.h"
#include "WorkerInterface.h"
#include "ClusterRecruitmentInterface.h"
#include <time.h>
#include "CoordinationInterface.h"
#include "DataDistribution.h"
#include "flow/UnitTest.h"
#include "QuietDatabase.h"
#include "RecoveryState.h"

const char* RecoveryStatus::names[] = {
	"reading_coordinated_state", "locking_coordinated_state", "locking_old_transaction_servers", "reading_transaction_system_state",
	"configuration_missing", "configuration_never_created", "configuration_invalid",
	"recruiting_transaction_servers", "initializing_transaction_servers", "recovery_transaction",
	"writing_coordinated_state", "fully_recovered"
};
static_assert( sizeof(RecoveryStatus::names) == sizeof(RecoveryStatus::names[0])*RecoveryStatus::END, "RecoveryStatus::names[] size" );
const char* RecoveryStatus::descriptions[] = {
	// reading_coordinated_state
	"Requesting information from coordination servers. Verify that a majority of coordination server processes are active.",
	// locking_coordinated_state
	"Locking coordination state. Verify that a majority of coordination server processes are active.",
	// locking_old_transaction_servers
	"Locking old transaction servers. Verify that a least one transaction server from the previous generation is running.",
	// reading_transaction_system_state
	"Recovering transaction server state. Verify that the transaction server processes are active.",
	// configuration_missing
	"There appears to be a database, but its configuration does not appear to be initialized.",
	// configuration_never_created
	"The coordinator(s) have no record of this database. Either the coordinator addresses are incorrect, the coordination state on those machines is missing, or no database has been created.",
	// configuration_invalid
	"The database configuration is invalid. Set a new, valid configuration to recover the database.",
	// recruiting_transaction_servers
	"Recruiting new transaction servers.",
	// initializing_transaction_servers
	"Initializing new transaction servers and recovering transaction logs.",
	// recovery_transaction
	"Performing recovery transaction.",
	// writing_coordinated_state
	"Writing coordinated state. Verify that a majority of coordination server processes are active.",
	// fully_recovered
	"Recovery complete."
};
static_assert( sizeof(RecoveryStatus::descriptions) == sizeof(RecoveryStatus::descriptions[0])*RecoveryStatus::END, "RecoveryStatus::descriptions[] size" );

// From Ratekeeper.actor.cpp
extern int limitReasonEnd;
extern const char* limitReasonName[];
extern const char* limitReasonDesc[];

// Returns -1 if it fails to find a quoted string at the start of xml; returns the position beyond the close quote
// If decoded is not NULL, writes the decoded attribute value there
int decodeQuotedAttributeValue( StringRef xml, std::string* decoded ) {
	if (decoded) decoded->clear();
	if (!xml.size() || xml[0] != '"') return -1;
	int pos = 1;

	loop {
		if (pos == xml.size()) return -1;  // No closing quote
		if (xml[pos]=='"') { pos++; break; } // Success

		uint8_t out = xml[pos];
		if (xml[pos] == '&') {
			if (xml.substr(pos).startsWith(LiteralStringRef("&amp;"))) { out = '&'; pos += 5; }
			else if (xml.substr(pos).startsWith(LiteralStringRef("&lt;"))) { out = '<'; pos += 4; }
			else if (xml.substr(pos).startsWith(LiteralStringRef("&quot;"))) { out = '"'; pos += 6; }
			else return -1;
		} else
			pos++;
		if (decoded) decoded->push_back(out);
	}

	return pos;
}

// return false on failure; outputs decoded attribute value to `ret`
bool tryExtractAttribute( StringRef expanded, StringRef attributeToExtract, std::string& ret ) {
	// This is only expected to parse the XML that Trace.cpp actually generates; we haven't looked at the standard to even find out what it doesn't try to do

	int pos = 0;
	// Consume '<'
	if (pos == expanded.size() || expanded[pos] != '<') return false;
	pos++;
	// Consume tag name
	while (pos != expanded.size() && expanded[pos] != ' ' && expanded[pos] != '/' && expanded[pos] != '>') pos++;

	while (pos != expanded.size() && expanded[pos] != '>' && expanded[pos] != '/') {
		// Consume whitespace
		while (pos != expanded.size() && expanded[pos] == ' ') pos++;

		// We should be looking at an attribute or the end of the string; find '=' at the end of the attribute, if any
		int eq_or_end = pos;
		while (eq_or_end != expanded.size() && expanded[eq_or_end]!='=' && expanded[eq_or_end]!='>') eq_or_end++;

		if ( expanded.substr(pos, eq_or_end-pos) == attributeToExtract ) {
			// Found the attribute we want; decode the value
			int end = decodeQuotedAttributeValue(expanded.substr(eq_or_end+1), &ret);
			if (end<0) { ret.clear(); return false; }
			return true;
		}

		// We don't want this attribute, but we need to skip over its value
		// It looks like this *could* just be a scan for '"' characters
		int end = decodeQuotedAttributeValue(expanded.substr(eq_or_end+1), NULL);
		if (end<0) return false;
		pos = (eq_or_end+1)+end;
	}
	return false;
}

// Throws attribute_not_found if the key is not found
std::string extractAttribute( StringRef expanded, StringRef attributeToExtract ) {
	std::string ret;
	if (!tryExtractAttribute(expanded, attributeToExtract, ret))
		throw attribute_not_found();
	return ret;
}
std::string extractAttribute( std::string const& expanded, std::string const& attributeToExtract ) {
	return extractAttribute(StringRef(expanded), StringRef(attributeToExtract));
}

TEST_CASE("fdbserver/Status/extractAttribute/basic") {
	std::string a;

	ASSERT( tryExtractAttribute(
		LiteralStringRef("<Foo A=\"&quot;a&quot;\" B=\"\" />"),
		LiteralStringRef("A"),
		a) && a == LiteralStringRef("\"a\""));

	ASSERT( tryExtractAttribute(
		LiteralStringRef("<Foo A=\"&quot;a&quot;\" B=\"\\\" />"),
		LiteralStringRef("B"),
		a) && a == LiteralStringRef("\\") );

	ASSERT( tryExtractAttribute(
		LiteralStringRef("<Event Severity=\"10\" Time=\"1415124565.129695\" Type=\"ProgramStart\" Machine=\"10.0.0.85:6863\" ID=\"0000000000000000\" RandomSeed=\"-2044671207\" SourceVersion=\"675cd9579467+ tip\" Version=\"3.0.0-PRERELEASE\" PackageName=\"3.0\" DataFolder=\"\" ConnectionString=\"circus:81060aa85f0a5b5b@10.0.0.5:4000,10.0.0.17:4000,10.0.0.78:4000,10.0.0.162:4000,10.0.0.182:4000\" ActualTime=\"1415124565\" CommandLine=\"fdbserver -r multitest -p auto:6863 -f /tmp/circus/testspec.txt --num_testers 24 --logdir /tmp/circus/multitest\" BuggifyEnabled=\"0\"/>"),
		LiteralStringRef("Version"),
		a) && a == LiteralStringRef("3.0.0-PRERELEASE") );

	ASSERT( !tryExtractAttribute(
		LiteralStringRef("<Event Severity=\"10\" Time=\"1415124565.129695\" Type=\"ProgramStart\" Machine=\"10.0.0.85:6863\" ID=\"0000000000000000\" RandomSeed=\"-2044671207\" SourceVersion=\"675cd9579467+ tip\" Version=\"3.0.0-PRERELEASE\" PackageName=\"3.0\" DataFolder=\"\" ConnectionString=\"circus:81060aa85f0a5b5b@10.0.0.5:4000,10.0.0.17:4000,10.0.0.78:4000,10.0.0.162:4000,10.0.0.182:4000\" ActualTime=\"1415124565\" CommandLine=\"fdbserver -r multitest -p auto:6863 -f /tmp/circus/testspec.txt --num_testers 24 --logdir /tmp/circus/multitest\" BuggifyEnabled=\"0\"/>"),
		LiteralStringRef("ersion"),
		a) );

	return Void();
}

TEST_CASE("fdbserver/Status/extractAttribute/fuzz") {
	// This is just looking for anything that crashes or infinite loops
	std::string out;
	for(int i=0; i<100000; i++)
	{
		std::string s = "<Event Severity=\"10\" Time=\"1415124565.129695\" Type=\"Program &quot;Start&quot;\" Machine=\"10.0.0.85:6863\" ID=\"0000000000000000\" RandomSeed=\"-2044671207\" SourceVersion=\"675cd9579467+ tip\" Version=\"3.0.0-PRERELEASE\" PackageName=\"3.0\" DataFolder=\"\" ConnectionString=\"circus:81060aa85f0a5b5b@10.0.0.5:4000,10.0.0.17:4000,10.0.0.78:4000,10.0.0.162:4000,10.0.0.182:4000\" ActualTime=\"1415124565\" CommandLine=\"fdbserver -r multitest -p auto:6863 -f /tmp/circus/testspec.txt --num_testers 24 --logdir /tmp/circus/multitest\" BuggifyEnabled=\"0\"/>";
		s[ g_random->randomInt(0, s.size()) ] = g_random->randomChoice(LiteralStringRef("\" =q0\\&"));
		tryExtractAttribute(s, LiteralStringRef("Version"), out);
	}
	return Void();
}

struct WorkerEvents : std::map<NetworkAddress, std::string>  {};

ACTOR static Future< Optional<std::string> > latestEventOnWorker(WorkerInterface worker, std::string eventName) {
	try {
		EventLogRequest req = eventName.size() > 0 ? EventLogRequest(Standalone<StringRef>(eventName)) : EventLogRequest();
		ErrorOr<Standalone<StringRef>> eventTrace  = wait( errorOr(timeoutError(worker.eventLogRequest.getReply(req), 2.0)));

		if (eventTrace.isError()){
			return Optional<std::string>();
		}
		return eventTrace.get().toString();
	}
	catch (Error &e){
		if (e.code() == error_code_actor_cancelled)
			throw;
		return Optional<std::string>();
	}
}

ACTOR static Future< Optional< std::pair<WorkerEvents, std::set<std::string>> > > latestEventOnWorkers(std::vector<std::pair<WorkerInterface, ProcessClass>> workers, std::string eventName) {
	try {
		state vector<Future<ErrorOr<Standalone<StringRef>>>> eventTraces;
		for (int c = 0; c < workers.size(); c++) {
			EventLogRequest req = eventName.size() > 0 ? EventLogRequest(Standalone<StringRef>(eventName)) : EventLogRequest();
			eventTraces.push_back(errorOr(timeoutError(workers[c].first.eventLogRequest.getReply(req), 2.0)));
		}

		Void _ = wait(waitForAll(eventTraces));

		std::set<std::string> failed;
		WorkerEvents results;

		for (int i = 0; i < eventTraces.size(); i++) {
			ErrorOr<Standalone<StringRef>> v = eventTraces[i].get();
			if (v.isError()){
				failed.insert(workers[i].first.address().toString());
				results[workers[i].first.address()] = "";
			}
			else {
				results[workers[i].first.address()] = v.get().toString();
			}
		}

		std::pair<WorkerEvents, std::set<std::string>> val;
		val.first = results;
		val.second = failed;

		return val;
	}
	catch (Error &e){
		ASSERT(e.code() == error_code_actor_cancelled); // All errors should be filtering through the errorOr actor above
		throw;
	}
}
static Future< Optional< std::pair<WorkerEvents, std::set<std::string>> > > latestErrorOnWorkers(std::vector<std::pair<WorkerInterface, ProcessClass>> workers) {
	return latestEventOnWorkers( workers, "" );
}

static Optional<std::pair<WorkerInterface, ProcessClass>> getWorker(std::vector<std::pair<WorkerInterface, ProcessClass>> const& workers, NetworkAddress const& address) {
	try {
		for (int c = 0; c < workers.size(); c++)
			if (address == workers[c].first.address())
				return workers[c];
		return Optional<std::pair<WorkerInterface, ProcessClass>>();
	}
	catch (Error &e){
		return Optional<std::pair<WorkerInterface, ProcessClass>>();
	}
}

static Optional<std::pair<WorkerInterface, ProcessClass>> getWorker(std::map<NetworkAddress, std::pair<WorkerInterface, ProcessClass>> const& workersMap, NetworkAddress const& address) {
	auto itr = workersMap.find(address);
	if(itr == workersMap.end()) {
		return Optional<std::pair<WorkerInterface, ProcessClass>>();
	}

	return itr->second;
}

static StatusObject makeCounter(double hz=0.0, double r=0.0, int64_t c=0) {
	StatusObject out;
	out["hz"] = hz;
	out["roughness"] = r;
	out["counter"] = c;
	return out;
}

static StatusObject parseCounter(std::string const& s) {
	// Parse what traceCounters() in Stats.actor.cpp formats
	double hz = 0.0, roughness = 0.0;
	long long counter = 0;
	sscanf(s.c_str(), "%lf %lf %lld", &hz, &roughness, &counter);
	return makeCounter(hz, roughness, counter);
}

static StatusObject addCounters(StatusObject c1, StatusObject c2) {
	// "add" the given counter objects.  Roughness is averaged weighted by rate.

	double c1hz = c1["hz"].get_real();
	double c2hz = c2["hz"].get_real();
	double c1r = c1["roughness"].get_real();
	double c2r = c2["roughness"].get_real();
	double c1c = c1["counter"].get_real();
	double c2c = c2["counter"].get_real();

	return makeCounter(
		c1hz+c2hz,
		(c1hz + c2hz) ? (c1r*c1hz + c2r*c2hz) / (c1hz + c2hz) : 0.0,
		c1c+c2c
		);
}

static double parseDouble(std::string const& s, bool permissive = false) {
	double d = 0;
	int consumed = 0;
	int r = sscanf(s.c_str(), "%lf%n", &d, &consumed);
	if (r == 1 && (consumed == s.size() || permissive))
		return d;
	throw attribute_not_found();
}

static int parseInt(std::string const& s, bool permissive = false) {
	long long int iLong = 0;
	int consumed = 0;
	int r = sscanf(s.c_str(), "%lld%n", &iLong, &consumed);
	if (r == 1 && (consumed == s.size() || permissive)){
		if (std::numeric_limits<int>::min() <= iLong && iLong <= std::numeric_limits<int>::max())
			return (int)iLong;  // Downcast definitely safe
		else
			throw attribute_too_large();
	}
	throw attribute_not_found();
}

static int64_t parseInt64(std::string const& s, bool permissive = false) {
	long long int i = 0;
	int consumed = 0;
	int r = sscanf(s.c_str(), "%lld%n", &i, &consumed);
	if (r == 1 && (consumed == s.size() || permissive))
		return i;
	throw attribute_not_found();
}

static StatusObject getLocalityInfo(const LocalityData& locality) {
	StatusObject localityObj;

	for(auto it = locality._data.begin(); it != locality._data.end(); it++) {
		if(it->second.present()) {
			localityObj[it->first.toString()] = it->second.get().toString();
		}
		else {
			localityObj[it->first.toString()] = json_spirit::mValue();
		}
	}

	return localityObj;
}

static StatusObject getError(std::string error) {
	StatusObject statusObj;
	try {
		if (error.size()) {
			double time = atof(extractAttribute(error, "Time").c_str());
			statusObj["time"] = time;

			statusObj["raw_log_message"] = error;

			std::string type = extractAttribute(error, "Type");
			statusObj["type"] = type;

			std::string description = type;
			std::string errorName;
			if (tryExtractAttribute(error, LiteralStringRef("Error"), errorName)) {
				statusObj["name"] = errorName;
				description += ": " + errorName;
			}
			else
				statusObj["name"] = "process_error";

			struct tm* timeinfo;
			time_t t = (time_t)time;
			timeinfo = localtime(&t);
			char buffer[128];
			strftime(buffer, 128, "%c", timeinfo);
			description += " at " + std::string(buffer);

			statusObj["description"] = description;
		}
	}
	catch (Error &e){
		TraceEvent(SevError, "StatusGetErrorError").error(e).detail("RawError", error);
	}
	return statusObj;
}

static StatusObject machineStatusFetcher(WorkerEvents mMetrics, vector<std::pair<WorkerInterface, ProcessClass>> workers, Optional<DatabaseConfiguration> configuration, std::set<std::string> *incomplete_reasons) {
	StatusObject machineMap;
	double metric;
	int failed = 0;

	// map from machine networkAddress to datacenter ID
	WorkerEvents dcIds;
	std::map<NetworkAddress, LocalityData> locality;

	for (auto worker : workers){
		locality[worker.first.address()] = worker.first.locality;
		if (worker.first.locality.dcId().present())
			dcIds[worker.first.address()] = worker.first.locality.dcId().get().printable();
	}

	for(auto it = mMetrics.begin(); it != mMetrics.end(); it++) {

		if (!it->second.size()){
			continue;
		}

		StatusObject statusObj;  // Represents the status for a machine
		std::string event = it->second;

		try {
			std::string address = toIPString(it->first.ip);
			// We will use the "physical" caluculated machine ID here to limit exposure to machineID repurposing
			std::string machineId = extractAttribute(event, "MachineID");

			// If this machine ID does not already exist in the machineMap, add it
			if (!machineMap.count(machineId)) {
				statusObj["machine_id"] = machineId;

				if (dcIds.count(it->first)){
					statusObj["datacenter_id"] = dcIds[it->first];
				}

				if(locality.count(it->first)) {
					statusObj["locality"] = getLocalityInfo(locality[it->first]);
				}

				statusObj["address"] = address;

				StatusObject memoryObj;

				metric = parseDouble(extractAttribute(event, "TotalMemory"));
				memoryObj["total_bytes"] = metric;

				metric = parseDouble(extractAttribute(event, "CommittedMemory"));
				memoryObj["committed_bytes"] = metric;

				metric = parseDouble(extractAttribute(event, "AvailableMemory"));
				memoryObj["free_bytes"] = metric;

				statusObj["memory"] = memoryObj;

				StatusObject cpuObj;

				metric = parseDouble(extractAttribute(event, "CPUSeconds"));
				double cpu_seconds = metric;

				metric = parseDouble(extractAttribute(event, "Elapsed"));
				double elapsed = metric;

				if (elapsed > 0){
					cpuObj["logical_core_utilization"] = std::max(0.0, std::min(cpu_seconds / elapsed, 1.0));
				}

				statusObj["cpu"] = cpuObj;

				StatusObject networkObj;

				metric = parseDouble(extractAttribute(event, "MbpsSent"));
				StatusObject megabits_sent;
				megabits_sent["hz"] = metric;
				networkObj["megabits_sent"] = megabits_sent;

				metric = parseDouble(extractAttribute(event, "MbpsReceived"));
				StatusObject megabits_received;
				megabits_received["hz"] = metric;
				networkObj["megabits_received"] = megabits_received;

				metric = parseDouble(extractAttribute(event, "RetransSegs"));
				StatusObject retransSegsObj;
				if (elapsed > 0){
					retransSegsObj["hz"] = metric / elapsed;
				}
				networkObj["tcp_segments_retransmitted"] = retransSegsObj;

				statusObj["network"] = networkObj;

				if (configuration.present()){
					statusObj["excluded"] = true; // Will be set to false below if this or any later process is not excluded
				}

				statusObj["contributing_workers"] = 0;

				machineMap[machineId] = statusObj;
			}
			if (configuration.present() && !configuration.get().isExcludedServer(it->first))
				machineMap[machineId].get_obj()["excluded"] = false;

			machineMap[machineId].get_obj()["contributing_workers"] = machineMap[machineId].get_obj()["contributing_workers"].get_int() + 1;
		}
		catch (Error& e) {
			++failed;
		}
	}

	if(failed > 0)
		incomplete_reasons->insert("Cannot retrieve all machine status information.");

	return machineMap;
}

struct MachineMemoryInfo {
	double memoryUsage;
	double numProcesses;

	MachineMemoryInfo() : memoryUsage(0), numProcesses(0) {}

	bool valid() { return memoryUsage >= 0; }
	void invalidate() { memoryUsage = -1; }
};

struct RolesInfo {
	std::multimap<NetworkAddress, StatusObject> roles;
	StatusObject& addRole( NetworkAddress address, std::string const& role, UID id) {
		StatusObject obj;
		obj["id"] = id.shortString();
		obj["role"] = role;
		return roles.insert( make_pair(address, obj ))->second;
	}
	StatusObject& addRole(std::string const& role, StorageServerInterface& iface, std::string const& metrics, Version maxTLogVersion) {
		StatusObject obj;
		obj["id"] = iface.id().shortString();
		obj["role"] = role;
		try {
			obj["stored_bytes"] = parseInt64(extractAttribute(metrics, "bytesStored"));
			obj["kvstore_used_bytes"] = parseInt64(extractAttribute(metrics, "kvstoreBytesUsed"));
			obj["kvstore_free_bytes"] = parseInt64(extractAttribute(metrics, "kvstoreBytesFree"));
			obj["kvstore_available_bytes"] = parseInt64(extractAttribute(metrics, "kvstoreBytesAvailable"));
			obj["kvstore_total_bytes"] = parseInt64(extractAttribute(metrics, "kvstoreBytesTotal"));
			obj["input_bytes"] = parseCounter(extractAttribute(metrics, "bytesInput"));
			obj["durable_bytes"] = parseCounter(extractAttribute(metrics, "bytesDurable"));
			obj["query_queue_max"] = parseInt(extractAttribute(metrics, "QueryQueueMax"));
			obj["finished_queries"] = parseCounter(extractAttribute(metrics, "finishedQueries"));

			Version version = parseInt64(extractAttribute(metrics, "version"));
			obj["data_version"] = version;

			if(maxTLogVersion > 0) {
				obj["data_version_lag"] = std::max<Version>(0, maxTLogVersion - version);
			}

		} catch (Error& e) {
			if(e.code() != error_code_attribute_not_found)
				throw e;
		}
		return roles.insert( make_pair(iface.address(), obj ))->second;
	}
	StatusObject& addRole(std::string const& role, TLogInterface& iface, std::string const& metrics) {
		StatusObject obj;
		obj["id"] = iface.id().shortString();
		obj["role"] = role;
		try {
			obj["kvstore_used_bytes"] = parseInt64(extractAttribute(metrics, "kvstoreBytesUsed"));
			obj["kvstore_free_bytes"] = parseInt64(extractAttribute(metrics, "kvstoreBytesFree"));
			obj["kvstore_available_bytes"] = parseInt64(extractAttribute(metrics, "kvstoreBytesAvailable"));
			obj["kvstore_total_bytes"] = parseInt64(extractAttribute(metrics, "kvstoreBytesTotal"));
			obj["queue_disk_used_bytes"] = parseInt64(extractAttribute(metrics, "queueDiskBytesUsed"));
			obj["queue_disk_free_bytes"] = parseInt64(extractAttribute(metrics, "queueDiskBytesFree"));
			obj["queue_disk_available_bytes"] = parseInt64(extractAttribute(metrics, "queueDiskBytesAvailable"));
			obj["queue_disk_total_bytes"] = parseInt64(extractAttribute(metrics, "queueDiskBytesTotal"));
			obj["input_bytes"] = parseCounter(extractAttribute(metrics, "bytesInput"));
			obj["durable_bytes"] = parseCounter(extractAttribute(metrics, "bytesDurable"));
			obj["data_version"] = parseInt64(extractAttribute(metrics, "version"));
		} catch (Error& e) {
			if(e.code() != error_code_attribute_not_found)
				throw e;
		}
		return roles.insert( make_pair(iface.address(), obj ))->second;
	}
	template <class InterfaceType>
	StatusObject& addRole(std::string const& role, InterfaceType& iface) {
		return addRole(iface.address(), role, iface.id());
	}
	StatusArray getStatusForAddress( NetworkAddress a ) {
		StatusArray v;
		auto it = roles.lower_bound(a);
		while (it != roles.end() && it->first == a) {
			v.push_back(it->second);
			++it;
		}
		return v;
	}
};

ACTOR static Future<StatusObject> processStatusFetcher(
		Reference<AsyncVar<struct ServerDBInfo>> db,
		std::vector<std::pair<WorkerInterface, ProcessClass>> workers,
		WorkerEvents pMetrics,
		WorkerEvents mMetrics,
		WorkerEvents errors,
		WorkerEvents traceFileOpenErrors,
		WorkerEvents programStarts,
		std::map<std::string, StatusObject> processIssues,
		vector<std::pair<StorageServerInterface, std::string>> storageServers,
		vector<std::pair<TLogInterface, std::string>> tLogs,
		Database cx,
		Optional<DatabaseConfiguration> configuration,
		std::set<std::string> *incomplete_reasons) {

	// Array to hold one entry for each process
	state StatusObject processMap;
	state double metric;

	// construct a map from a process address to a status object containing a trace file open error
	// this is later added to the messages subsection
	state std::map<std::string, StatusObject> tracefileOpenErrorMap;
	state WorkerEvents::iterator traceFileErrorsItr;
	for(traceFileErrorsItr = traceFileOpenErrors.begin(); traceFileErrorsItr != traceFileOpenErrors.end(); ++traceFileErrorsItr) {
		Void _ = wait(yield());
		if (traceFileErrorsItr->second.size()){
			try {
				// Have event string, parse it and turn it into a message object describing the trace file opening error
				std::string event = traceFileErrorsItr->second;
				std::string fileName = extractAttribute(event, "Filename");
				StatusObject msgObj = makeMessage("file_open_error", format("Could not open file '%s' (%s).", fileName.c_str(), extractAttribute(event, "Error").c_str()).c_str());
				msgObj["file_name"] = fileName;

				// Map the address of the worker to the error message object
				tracefileOpenErrorMap[traceFileErrorsItr->first.toString()] = msgObj;
			}
			catch(Error &e) {
				incomplete_reasons->insert("file_open_error details could not be retrieved");
			}
		}
	}

	state std::map<Optional<Standalone<StringRef>>, MachineMemoryInfo> machineMemoryUsage;
	state std::vector<std::pair<WorkerInterface, ProcessClass>>::iterator workerItr;
	for(workerItr = workers.begin(); workerItr != workers.end(); ++workerItr) {
		Void _ = wait(yield());
		state std::map<Optional<Standalone<StringRef>>, MachineMemoryInfo>::iterator memInfo = machineMemoryUsage.insert(std::make_pair(workerItr->first.locality.machineId(), MachineMemoryInfo())).first;
		try {
			ASSERT(pMetrics.count(workerItr->first.address()));
			std::string processMetrics = pMetrics[workerItr->first.address()];

			if(memInfo->second.valid()) {
				if(processMetrics.size() > 0) {
					memInfo->second.memoryUsage += parseDouble(extractAttribute(processMetrics, "Memory"));
					++memInfo->second.numProcesses;
				}
				else
					memInfo->second.invalidate();
			}
		}
		catch(Error &e) {
			memInfo->second.invalidate();
		}
	}

	state RolesInfo roles;

	roles.addRole("master", db->get().master);
	roles.addRole("cluster_controller", db->get().clusterInterface.clientInterface);

	state Reference<ProxyInfo> proxies = cx->getMasterProxies();
	if (proxies) {
		state int proxyIndex;
		for(proxyIndex = 0; proxyIndex < proxies->size(); proxyIndex++) {
			roles.addRole( "proxy", proxies->getInterface(proxyIndex) );
			Void _ = wait(yield());
		}
	}

	state std::vector<std::pair<TLogInterface, std::string>>::iterator log;
	state Version maxTLogVersion = 0;
	for(log = tLogs.begin(); log != tLogs.end(); ++log) {
		StatusObject const& roleStatus = roles.addRole( "log", log->first, log->second );
		if(roleStatus.count("data_version") > 0) {
			maxTLogVersion = std::max(maxTLogVersion, roleStatus.at("data_version").get_int64());
		}
		Void _ = wait(yield());
	}

	state std::vector<std::pair<StorageServerInterface, std::string>>::iterator ss;
	state std::map<NetworkAddress, int64_t> ssLag;
	for(ss = storageServers.begin(); ss != storageServers.end(); ++ss) {
		StatusObject const& roleStatus = roles.addRole( "storage", ss->first, ss->second, maxTLogVersion );
		if(roleStatus.count("data_version_lag") > 0) {
			ssLag[ss->first.address()] = roleStatus.at("data_version_lag").get_int64();
		}
		Void _ = wait(yield());
	}

	state std::vector<ResolverInterface>::const_iterator res;
	state std::vector<ResolverInterface> resolvers = db->get().resolvers;
	for(res = resolvers.begin(); res != resolvers.end(); ++res) {
		roles.addRole( "resolver", *res );
		Void _ = wait(yield());
	}

	for(workerItr = workers.begin(); workerItr != workers.end(); ++workerItr) {
		Void _ = wait(yield());
		state StatusObject statusObj;
		try {
			ASSERT(pMetrics.count(workerItr->first.address()));

			processMap[printable(workerItr->first.locality.processId())] = StatusObject();

			NetworkAddress address = workerItr->first.address();
			std::string event = pMetrics[workerItr->first.address()];
			statusObj["address"] = address.toString();
			StatusObject memoryObj;

			if (event.size() > 0) {
				std::string zoneID = extractAttribute(event, "ZoneID");
				statusObj["fault_domain"] = zoneID;

				std::string MachineID = extractAttribute(event, "MachineID");
				statusObj["machine_id"] = MachineID;

				statusObj["locality"] = getLocalityInfo(workerItr->first.locality);

				statusObj["uptime_seconds"] = parseDouble(extractAttribute(event, "UptimeSeconds"));

				metric = parseDouble(extractAttribute(event, "CPUSeconds"));
				double cpu_seconds = metric;

				// rates are calculated over the last elapsed seconds
				metric = parseDouble(extractAttribute(event, "Elapsed"));
				double elapsed = metric;

				metric = parseDouble(extractAttribute(event, "DiskIdleSeconds"));
				double diskIdleSeconds = metric;

				metric = parseDouble(extractAttribute(event, "DiskReads"));
				double diskReads = metric;

				metric = parseDouble(extractAttribute(event, "DiskWrites"));
				double diskWrites = metric;

				uint64_t diskReadsCount = parseInt64(extractAttribute(event, "DiskReadsCount"));

				uint64_t diskWritesCount = parseInt64(extractAttribute(event, "DiskWritesCount"));

				metric = parseDouble(extractAttribute(event, "DiskWriteSectors"));
				double diskWriteSectors = metric;

				metric = parseDouble(extractAttribute(event, "DiskReadSectors"));
				double diskReadSectors = metric;

				StatusObject diskObj;
				if (elapsed > 0){
					StatusObject cpuObj;
					cpuObj["usage_cores"] = std::max(0.0, cpu_seconds / elapsed);
					statusObj["cpu"] = cpuObj;

					diskObj["busy"] = std::max(0.0, std::min((elapsed - diskIdleSeconds) / elapsed, 1.0));

					StatusObject readsObj;
					readsObj["counter"] = diskReadsCount;
					if (elapsed > 0)
						readsObj["hz"] = diskReads / elapsed;
					readsObj["sectors"] = diskReadSectors;

					StatusObject writesObj;
					writesObj["counter"] = diskWritesCount;
					if (elapsed > 0)
						writesObj["hz"] = diskWrites / elapsed;
					writesObj["sectors"] = diskWriteSectors;

					diskObj["reads"] = readsObj;
					diskObj["writes"] = writesObj;
				}

				diskObj["total_bytes"] = parseInt64(extractAttribute(event, "DiskTotalBytes"));
				diskObj["free_bytes"] = parseInt64(extractAttribute(event, "DiskFreeBytes"));
				statusObj["disk"] = diskObj;

				StatusObject networkObj;

				networkObj["current_connections"] = parseInt64(extractAttribute(event, "CurrentConnections"));
				StatusObject connections_established;
				connections_established["hz"] = parseDouble(extractAttribute(event, "ConnectionsEstablished"));
				networkObj["connections_established"] = connections_established;
				StatusObject connections_closed;
				connections_closed["hz"] = parseDouble(extractAttribute(event, "ConnectionsClosed"));
				networkObj["connections_closed"] = connections_closed;
				StatusObject connection_errors;
				connection_errors["hz"] = parseDouble(extractAttribute(event, "ConnectionErrors"));
				networkObj["connection_errors"] = connection_errors;

				metric = parseDouble(extractAttribute(event, "MbpsSent"));
				StatusObject megabits_sent;
				megabits_sent["hz"] = metric;
				networkObj["megabits_sent"] = megabits_sent;

				metric = parseDouble(extractAttribute(event, "MbpsReceived"));
				StatusObject megabits_received;
				megabits_received["hz"] = metric;
				networkObj["megabits_received"] = megabits_received;

				statusObj["network"] = networkObj;

				metric = parseDouble(extractAttribute(event, "Memory"));
				memoryObj["used_bytes"] = metric;
			}

			if (programStarts.count(address)) {
				auto const& psxml = programStarts.at(address);
				int64_t memLimit = parseInt64(extractAttribute(psxml, "MemoryLimit"));
				memoryObj["limit_bytes"] = memLimit;
			}

			// if this process address is in the machine metrics
			if (mMetrics.count(address) && mMetrics[address].size()){
				double availableMemory;
				availableMemory = parseDouble(extractAttribute(mMetrics[address], "AvailableMemory"));

				auto machineMemInfo = machineMemoryUsage[workerItr->first.locality.machineId()];
				if (machineMemInfo.valid()) {
					ASSERT(machineMemInfo.numProcesses > 0);
					int64_t memory = (availableMemory + machineMemInfo.memoryUsage) / machineMemInfo.numProcesses;
					memoryObj["available_bytes"] = std::max<int64_t>(memory, 0);
				}
			}

			statusObj["memory"] = memoryObj;

			StatusArray messages;

			if (errors.count(address) && errors[address].size())
				// returns status object with type and time of error
				messages.push_back(getError(errors.at(address)));

			// string of address used so that other fields of a NetworkAddress are not compared
			std::string strAddress = address.toString();

			// If this process has a process issue, identified by strAddress, then add it to messages array
			if (processIssues.count(strAddress)){
				messages.push_back(processIssues[strAddress]);
			}

			// If this process had a trace file open error, identified by strAddress, then add it to messages array
			if (tracefileOpenErrorMap.count(strAddress)){
				messages.push_back(tracefileOpenErrorMap[strAddress]);
			}

			if(ssLag[address] > 60 * SERVER_KNOBS->VERSIONS_PER_SECOND) {
				messages.push_back(makeMessage("storage_server_lagging", format("Storage server lagging by %ld seconds.", ssLag[address] / SERVER_KNOBS->VERSIONS_PER_SECOND).c_str()));
			}

			// Store the message array into the status object that represents the worker process
			statusObj["messages"] = messages;

			// Get roles for the worker's address as an array of objects
			statusObj["roles"] = roles.getStatusForAddress(address);

			if (programStarts.count(address)) {
				auto const& psxml = programStarts.at(address);

				std::string version;
				if (tryExtractAttribute(psxml, LiteralStringRef("Version"), version))
					statusObj["version"] = version;

				std::string commandLine;
				if (tryExtractAttribute(psxml, LiteralStringRef("CommandLine"), commandLine))
					statusObj["command_line"] = commandLine;
			}

			if (configuration.present()){
				statusObj["excluded"] = configuration.get().isExcludedServer(address);
			}

			statusObj["class_type"] = workerItr->second.toString();
			statusObj["class_source"] = workerItr->second.sourceString();

		}
		catch (Error& e){
			// Something strange occurred, process list is incomplete but what was built so far, if anything, will be returned.
			incomplete_reasons->insert("Cannot retrieve all process status information.");
		}

		processMap[printable(workerItr->first.locality.processId())] = statusObj;
	}
	return processMap;
}

static StatusObject clientStatusFetcher(ClientVersionMap clientVersionMap) {
	StatusObject clientStatus;

	clientStatus["count"] = (int64_t)clientVersionMap.size();

	std::map<ClientVersionRef, std::set<NetworkAddress>> clientVersions;
	for(auto client : clientVersionMap) {
		for(auto ver : client.second) {
			clientVersions[ver].insert(client.first);
		}
	}

	StatusArray versionsArray = StatusArray();
	for(auto cv : clientVersions) {
		StatusObject ver;
		ver["count"] = (int64_t)cv.second.size();
		ver["client_version"] = cv.first.clientVersion.toString();
		ver["protocol_version"] = cv.first.protocolVersion.toString();
		ver["source_version"] = cv.first.sourceVersion.toString();

		StatusArray clients = StatusArray();
		for(auto client : cv.second) {
			clients.push_back(client.toString());
		}

		ver["clients"] = clients;
		versionsArray.push_back(ver);
	}

	if(versionsArray.size() > 0) {
		clientStatus["supported_versions"] = versionsArray;
	}

	return clientStatus;
}

ACTOR static Future<StatusObject> recoveryStateStatusFetcher(std::pair<WorkerInterface, ProcessClass> mWorker, std::string dbName, int workerCount, std::set<std::string> *incomplete_reasons) {
	state StatusObject message;

	try {
		Standalone<StringRef> md = wait( timeoutError(mWorker.first.eventLogRequest.getReply( EventLogRequest(StringRef(dbName+"/MasterRecoveryState") ) ), 1.0) );
		state int mStatusCode = parseInt( extractAttribute(md, LiteralStringRef("StatusCode")) );
		if (mStatusCode < 0 || mStatusCode >= RecoveryStatus::END)
			throw attribute_not_found();

		message = makeMessage(RecoveryStatus::names[mStatusCode], RecoveryStatus::descriptions[mStatusCode]);

		// Add additional metadata for certain statuses
		if (mStatusCode == RecoveryStatus::recruiting_transaction_servers) {
			int requiredLogs = atoi( extractAttribute(md, LiteralStringRef("RequiredTLogs")).c_str() );
			int requiredProxies = atoi( extractAttribute(md, LiteralStringRef("RequiredProxies")).c_str() );
			int requiredResolvers = atoi( extractAttribute(md, LiteralStringRef("RequiredResolvers")).c_str() );
			//int requiredProcesses = std::max(requiredLogs, std::max(requiredResolvers, requiredProxies));
			//int requiredMachines = std::max(requiredLogs, 1);

			message["required_logs"] = requiredLogs;
			message["required_proxies"] = requiredProxies;
			message["required_resolvers"] = requiredResolvers;
		}
		// TODO:  time_in_recovery: 0.5
		//        time_in_state: 0.1

	} catch (Error &e){
		if (e.code() == error_code_actor_cancelled)
			throw;
	}

	// If recovery status name is not know, status is incomplete
	if (!message.count("name"))
		incomplete_reasons->insert("Recovery Status unavailable.");

	return message;
}

ACTOR static Future<double> doGrvProbe(Transaction *tr, Optional<FDBTransactionOptions::Option> priority = Optional<FDBTransactionOptions::Option>()) {
	state double start = timer_monotonic();

	loop {
		try {
			tr->setOption(FDBTransactionOptions::LOCK_AWARE);
			if(priority.present()) {
				tr->setOption(priority.get());
			}

			Version _ = wait(tr->getReadVersion());
			return timer_monotonic() - start;
		}
		catch(Error &e) {
			Void _ = wait(tr->onError(e));
		}
	}
}

ACTOR static Future<double> doReadProbe(Future<double> grvProbe, Transaction *tr) {
	ErrorOr<double> grv = wait(errorOr(grvProbe));
	if(grv.isError()) {
		throw grv.getError();
	}

	state double start = timer_monotonic();

	loop {
		tr->setOption(FDBTransactionOptions::LOCK_AWARE);
		try {
			Optional<Standalone<StringRef> > _ = wait(tr->get(LiteralStringRef("\xff/StatusJsonTestKey62793")));
			return timer_monotonic() - start;
		}
		catch(Error &e) {
			Void _ = wait(tr->onError(e));
			tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
		}
	}
}

ACTOR static Future<double> doCommitProbe(Future<double> grvProbe, Transaction *sourceTr, Transaction *tr) {
	ErrorOr<double> grv = wait(errorOr(grvProbe));
	if(grv.isError()) {
		throw grv.getError();
	}

	ASSERT(sourceTr->getReadVersion().isReady());
	tr->setVersion(sourceTr->getReadVersion().get());

	state double start = timer_monotonic();

	loop {
		try {
			tr->setOption(FDBTransactionOptions::LOCK_AWARE);
			tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
			tr->makeSelfConflicting();
			Void _ = wait(tr->commit());
			return timer_monotonic() - start;
		}
		catch(Error &e) {
			Void _ = wait(tr->onError(e));
		}
	}
}

ACTOR static Future<Void> doProbe(Future<double> probe, int timeoutSeconds, const char* prefix, const char* description, StatusObject *probeObj, StatusArray *messages, std::set<std::string> *incomplete_reasons) {
	choose {
		when(ErrorOr<double> result = wait(errorOr(probe))) {
			if(result.isError()) {
				incomplete_reasons->insert(format("Unable to retrieve latency probe information (%s: %s).", description, result.getError().what()));
			}
			else {
				(*probeObj)[format("%s_seconds", prefix).c_str()] = result.get();
			}
		}
		when(Void _ = wait(delay(timeoutSeconds))) {
			messages->push_back(makeMessage(format("%s_probe_timeout", prefix).c_str(), format("Unable to %s after %d seconds.", description, timeoutSeconds).c_str()));
		}
	}

	return Void();
}

ACTOR static Future<StatusObject> latencyProbeFetcher(Database cx, StatusArray *messages, std::set<std::string> *incomplete_reasons) {
	state Transaction trImmediate(cx);
	state Transaction trDefault(cx);
	state Transaction trBatch(cx);
	state Transaction trWrite(cx);

	state StatusObject statusObj;

	try {
		Future<double> immediateGrvProbe = doGrvProbe(&trImmediate, FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
		Future<double> defaultGrvProbe = doGrvProbe(&trDefault);
		Future<double> batchGrvProbe = doGrvProbe(&trBatch, FDBTransactionOptions::PRIORITY_BATCH);

		Future<double> readProbe = doReadProbe(immediateGrvProbe, &trImmediate);
		Future<double> commitProbe = doCommitProbe(immediateGrvProbe, &trImmediate, &trWrite);

		int timeoutSeconds = 5;

		std::vector<Future<Void>> probes;
		probes.push_back(doProbe(immediateGrvProbe, timeoutSeconds, "immediate_priority_transaction_start", "start immediate priority transaction", &statusObj, messages, incomplete_reasons));
		probes.push_back(doProbe(defaultGrvProbe, timeoutSeconds, "transaction_start", "start default priority transaction", &statusObj, messages, incomplete_reasons));
		probes.push_back(doProbe(batchGrvProbe, timeoutSeconds, "batch_priority_transaction_start", "start batch priority transaction", &statusObj, messages, incomplete_reasons));
		probes.push_back(doProbe(readProbe, timeoutSeconds, "read", "read", &statusObj, messages, incomplete_reasons));
		probes.push_back(doProbe(commitProbe, timeoutSeconds, "commit", "commit", &statusObj, messages, incomplete_reasons));

		Void _ = wait(waitForAll(probes));
	}
	catch (Error &e) {
		incomplete_reasons->insert(format("Unable to retrieve latency probe information (%s).", e.what()));
	}

	return statusObj;
}

ACTOR static Future<Optional<DatabaseConfiguration>> loadConfiguration(Database cx, StatusArray *messages, std::set<std::string> *status_incomplete_reasons){
	state Optional<DatabaseConfiguration> result;
	state Transaction tr(cx);
	state Future<Void> getConfTimeout = delay(5.0);

	loop{
		tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
		tr.setOption(FDBTransactionOptions::CAUSAL_READ_RISKY);
		try {
			choose{
				when(Standalone<RangeResultRef> res = wait(tr.getRange(configKeys, SERVER_KNOBS->CONFIGURATION_ROWS_TO_FETCH))) {
					DatabaseConfiguration configuration;
					if (res.size() == SERVER_KNOBS->CONFIGURATION_ROWS_TO_FETCH) {
						status_incomplete_reasons->insert("Too many configuration parameters set.");
					}
					else {
						for (int i = 0; i < res.size(); i++) {
							configuration.set(res[i].key, res[i].value);
						}
					}

					result = configuration;
				}
				when(Void _ = wait(getConfTimeout)) {
					messages->push_back(makeMessage("unreadable_configuration", "Unable to read database configuration."));
				}
			}
			break;
		}
		catch (Error &e) {
			Void _ = wait(tr.onError(e));
		}
	}
	return result;
}

static StatusObject configurationFetcher(Optional<DatabaseConfiguration> conf, ServerCoordinators coordinators, std::set<std::string> *incomplete_reasons) {
	StatusObject statusObj;
	try {
		StatusArray coordinatorLeaderServersArr;
		vector< ClientLeaderRegInterface > coordinatorLeaderServers = coordinators.clientLeaderServers;
		int count = coordinatorLeaderServers.size();
		statusObj["coordinators_count"] = count;

		if(conf.present()) {
			DatabaseConfiguration configuration = conf.get();
			std::map<std::string, std::string> configMap = configuration.toMap();
			for (auto it = configMap.begin(); it != configMap.end(); it++) {
				if (it->first == "redundancy_mode")
				{
					StatusObject redundancyStatusObj;
					redundancyStatusObj["factor"] = it->second;
					statusObj["redundancy"] = redundancyStatusObj;
				}
				else {
					statusObj[it->first] = it->second;
				}
			}

			StatusArray excludedServersArr;
			std::set<AddressExclusion> excludedServers = configuration.getExcludedServers();
			for (std::set<AddressExclusion>::iterator it = excludedServers.begin(); it != excludedServers.end(); it++) {
				StatusObject statusObj;
				statusObj["address"] = it->toString();
				excludedServersArr.push_back(statusObj);
			}
			statusObj["excluded_servers"] = excludedServersArr;

			if (configuration.masterProxyCount != -1)
				statusObj["proxies"] = configuration.getDesiredProxies();
			else if (configuration.autoMasterProxyCount != CLIENT_KNOBS->DEFAULT_AUTO_PROXIES)
				statusObj["auto_proxies"] = configuration.autoMasterProxyCount;

			if (configuration.resolverCount != -1)
				statusObj["resolvers"] = configuration.getDesiredResolvers();
			else if (configuration.autoResolverCount != CLIENT_KNOBS->DEFAULT_AUTO_RESOLVERS)
				statusObj["auto_resolvers"] = configuration.autoResolverCount;

			if (configuration.desiredTLogCount != -1)
				statusObj["logs"] = configuration.getDesiredLogs();
			else if (configuration.autoDesiredTLogCount != CLIENT_KNOBS->DEFAULT_AUTO_LOGS)
				statusObj["auto_logs"] = configuration.autoDesiredTLogCount;

			if(configuration.storagePolicy) {
				statusObj["storage_policy"] = configuration.storagePolicy->info();
			}
			if(configuration.tLogPolicy) {
				statusObj["tlog_policy"] = configuration.tLogPolicy->info();
			}
		}
	}
	catch (Error &e){
		incomplete_reasons->insert("Could not retrieve all configuration status information.");
	}
	return statusObj;
}

ACTOR static Future<StatusObject> dataStatusFetcher(std::pair<WorkerInterface, ProcessClass> mWorker, std::string dbName, int *minReplicasRemaining) {
	state StatusObject stateSectionObj;
	state StatusObject statusObjData;

	try {
		std::vector<Future<Standalone<StringRef>>> futures;

		// TODO:  Should this be serial?
		futures.push_back(timeoutError(mWorker.first.eventLogRequest.getReply(EventLogRequest(StringRef(dbName + "/DDTrackerStarting"))), 1.0));
		futures.push_back(timeoutError(mWorker.first.eventLogRequest.getReply(EventLogRequest(StringRef(dbName + "/DDTrackerStats"))), 1.0));

		std::vector<Standalone<StringRef>> dataInfo = wait(getAll(futures));

		Standalone<StringRef> startingStats = dataInfo[0];
		state Standalone<StringRef> dataStats = dataInfo[1];

		if (startingStats.size() && extractAttribute(startingStats, LiteralStringRef("State")) != "Active") {
			stateSectionObj["name"] = "initializing";
			stateSectionObj["description"] = "(Re)initializing automatic data distribution";
		}
		else {
			state Standalone<StringRef> md = wait(timeoutError(mWorker.first.eventLogRequest.getReply(EventLogRequest(StringRef(dbName + "/MovingData"))), 1.0));

			// If we have a MovingData message, parse it.
			if (md.size())
			{
				int64_t partitionsInQueue = parseInt64(extractAttribute(md, LiteralStringRef("InQueue")));
				int64_t partitionsInFlight = parseInt64(extractAttribute(md, LiteralStringRef("InFlight")));
				int64_t averagePartitionSize = parseInt64(extractAttribute(md, LiteralStringRef("AverageShardSize")));
				int64_t totalBytesWritten = parseInt64(extractAttribute(md, LiteralStringRef("BytesWritten")));
				int highestPriority = parseInt(extractAttribute(md, LiteralStringRef("HighestPriority")));

				if( averagePartitionSize >= 0 ) {
					StatusObject moving_data;
					moving_data["in_queue_bytes"] = partitionsInQueue * averagePartitionSize;
					moving_data["in_flight_bytes"] = partitionsInFlight * averagePartitionSize;
					moving_data["total_written_bytes"] = totalBytesWritten;

					// TODO: moving_data["rate_bytes"] = makeCounter(hz, c, r);
					statusObjData["moving_data"] = moving_data;

					statusObjData["average_partition_size_bytes"] = averagePartitionSize;
				}

				if (highestPriority >= PRIORITY_TEAM_0_LEFT) {
					stateSectionObj["healthy"] = false;
					stateSectionObj["name"] = "missing_data";
					stateSectionObj["description"] = "No replicas remain of some data";
					stateSectionObj["min_replicas_remaining"] = 0;
					*minReplicasRemaining = 0;
				}
				else if (highestPriority >= PRIORITY_TEAM_1_LEFT) {
					stateSectionObj["healthy"] = false;
					stateSectionObj["name"] = "healing";
					stateSectionObj["description"] = "Only one replica remains of some data";
					stateSectionObj["min_replicas_remaining"] = 1;
					*minReplicasRemaining = 1;
				}
				else if (highestPriority >= PRIORITY_TEAM_2_LEFT) {
					stateSectionObj["healthy"] = false;
					stateSectionObj["name"] = "healing";
					stateSectionObj["description"] = "Only two replicas remain of some data";
					stateSectionObj["min_replicas_remaining"] = 2;
					*minReplicasRemaining = 2;
				}
				else if (highestPriority >= PRIORITY_TEAM_UNHEALTHY) {
					stateSectionObj["healthy"] = false;
					stateSectionObj["name"] = "healing";
					stateSectionObj["description"] = "Restoring replication factor";
				}
				else if (highestPriority >= PRIORITY_MERGE_SHARD) {
					stateSectionObj["healthy"] = true;
					stateSectionObj["name"] = "healthy_repartitioning";
					stateSectionObj["description"] = "Repartitioning.";
				}
				else if (highestPriority >= PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER) {
					stateSectionObj["healthy"] = true;
					stateSectionObj["name"] = "healthy_removing_server";
					stateSectionObj["description"] = "Removing storage server";
				}
				else if (highestPriority >= PRIORITY_REBALANCE_SHARD) {
					stateSectionObj["healthy"] = true;
					stateSectionObj["name"] = "healthy_rebalancing";
					stateSectionObj["description"] = "Rebalancing";
				}
				else if (highestPriority >= 0) {
					stateSectionObj["healthy"] = true;
					stateSectionObj["name"] = "healthy";
				}
			}

			if (dataStats.size())
			{
				int64_t totalDBBytes = parseInt64(extractAttribute(dataStats, LiteralStringRef("TotalSizeBytes")));
				statusObjData["total_kv_size_bytes"] = totalDBBytes;
				int shards = parseInt(extractAttribute(dataStats, LiteralStringRef("Shards")));
				statusObjData["partitions_count"] = shards;
			}

		}
	}
	catch (Error &e) {
		if (e.code() == error_code_actor_cancelled)
			throw;
		// The most likely reason to be here is a timeout, either way we have no idea if the data state is healthy or not
		// from the "cluster" perspective - from the client perspective it is not but that is indicated elsewhere.
	}

	if (!stateSectionObj.empty())
		statusObjData["state"] = stateSectionObj;

	return statusObjData;
}

namespace std
{
	template <>
	struct hash<NetworkAddress>
	{
		size_t operator()(const NetworkAddress& na) const
		{
			return (na.ip << 16) + na.port;
		}
	};
}

ACTOR template <class iface>
static Future<vector<std::pair<iface, std::string>>> getServerMetrics(vector<iface> servers, std::unordered_map<NetworkAddress, WorkerInterface> address_workers, std::string suffix) {
	state vector<Future<Optional<std::string>>> futures;
	for (auto s : servers) {
		futures.push_back(latestEventOnWorker(address_workers[s.address()], s.id().toString() + suffix));
	}

	Void _ = wait(waitForAll(futures));

	vector<std::pair<iface, std::string>> results;
	for (int i = 0; i < servers.size(); i++) {
		results.push_back(std::make_pair(servers[i], futures[i].get().present() ? futures[i].get().get() : ""));
	}
	return results;
}

ACTOR static Future<vector<std::pair<StorageServerInterface, std::string>>> getStorageServersAndMetrics(Database cx, std::unordered_map<NetworkAddress, WorkerInterface> address_workers) {
	vector<StorageServerInterface> servers = wait(timeoutError(getStorageServers(cx, true), 5.0));
	vector<std::pair<StorageServerInterface, std::string>> results = wait(getServerMetrics(servers, address_workers, "/StorageMetrics"));
	return results;
}

ACTOR static Future<vector<std::pair<TLogInterface, std::string>>> getTLogsAndMetrics(Reference<AsyncVar<struct ServerDBInfo>> db, std::unordered_map<NetworkAddress, WorkerInterface> address_workers) {
	vector<TLogInterface> servers = db->get().logSystemConfig.allPresentLogs();
	vector<std::pair<TLogInterface, std::string>> results = wait(getServerMetrics(servers, address_workers, "/TLogMetrics"));
	return results;
}

static std::set<StringRef> getTLogEligibleMachines(vector<std::pair<WorkerInterface, ProcessClass>> workers, DatabaseConfiguration configuration) {
	std::set<StringRef> tlogEligibleMachines;
	for(auto worker : workers) {
		if(worker.second.machineClassFitness(ProcessClass::TLog) < ProcessClass::NeverAssign
			&& !configuration.isExcludedServer(worker.first.address()))
		{
			tlogEligibleMachines.insert(worker.first.locality.zoneId().get());
		}
	}

	return tlogEligibleMachines;
}

ACTOR static Future<StatusObject> workloadStatusFetcher(Reference<AsyncVar<struct ServerDBInfo>> db, vector<std::pair<WorkerInterface, ProcessClass>> workers, std::pair<WorkerInterface, ProcessClass> mWorker, std::string dbName, StatusObject *qos, StatusObject *data_overlay, std::set<std::string> *incomplete_reasons) {
	state StatusObject statusObj;
	state StatusObject operationsObj;

	// Writes and conflicts
	try {
		vector<Future<Standalone<StringRef>>> proxyStatFutures;
		std::map<NetworkAddress, std::pair<WorkerInterface, ProcessClass>> workersMap;
		for (auto w : workers) {
			workersMap[w.first.address()] = w;
		}
		for (auto &p : db->get().client.proxies) {
			auto worker = getWorker(workersMap, p.address());
			if (worker.present())
				proxyStatFutures.push_back(timeoutError(worker.get().first.eventLogRequest.getReply(EventLogRequest(LiteralStringRef("ProxyMetrics"))), 1.0));
			else
				throw all_alternatives_failed();  // We need data from all proxies for this result to be trustworthy
		}
		vector<Standalone<StringRef>> proxyStats = wait(getAll(proxyStatFutures));

		StatusObject mutations=makeCounter(), mutationBytes=makeCounter(), txnConflicts=makeCounter(), txnStartOut=makeCounter(), txnCommitOutSuccess=makeCounter();

		for (auto &ps : proxyStats) {
			mutations = addCounters( mutations, parseCounter(extractAttribute(ps, LiteralStringRef("mutations"))) );
			mutationBytes = addCounters( mutationBytes, parseCounter(extractAttribute(ps, LiteralStringRef("mutationBytes"))) );
			txnConflicts = addCounters( txnConflicts, parseCounter(extractAttribute(ps, LiteralStringRef("txnConflicts"))) );
			txnStartOut = addCounters( txnStartOut, parseCounter(extractAttribute(ps, LiteralStringRef("txnStartOut"))) );
			txnCommitOutSuccess = addCounters( txnCommitOutSuccess, parseCounter(extractAttribute(ps, LiteralStringRef("txnCommitOutSuccess"))) );
		}

		operationsObj["writes"] = mutations;

		StatusObject bytesObj;
		bytesObj["written"] = mutationBytes;
		statusObj["bytes"] = bytesObj;

		StatusObject transactions;
		transactions["conflicted"] = txnConflicts;
		transactions["started"] = txnStartOut;
		transactions["committed"] = txnCommitOutSuccess;

		statusObj["transactions"] = transactions;
	}
	catch (Error& e) {
		if (e.code() == error_code_actor_cancelled)
			throw;
		incomplete_reasons->insert("Unknown mutations, conflicts, and transactions state.");
	}

	// Transactions and reads
	try {
		Standalone<StringRef> md = wait( timeoutError(mWorker.first.eventLogRequest.getReply( EventLogRequest(StringRef(dbName+"/RkUpdate") ) ), 1.0) );
		double tpsLimit = parseDouble(extractAttribute(md, LiteralStringRef("TPSLimit")));
		double transPerSec = parseDouble(extractAttribute(md, LiteralStringRef("ReleasedTPS")));
		double readReplyRate = parseDouble(extractAttribute(md, LiteralStringRef("ReadReplyRate")));
		int ssCount = parseInt(extractAttribute(md, LiteralStringRef("StorageServers")));
		int tlogCount = parseInt(extractAttribute(md, LiteralStringRef("TLogs")));
		int64_t worstFreeSpaceStorageServer = parseInt64(extractAttribute(md, LiteralStringRef("WorstFreeSpaceStorageServer")));
		int64_t worstFreeSpaceTLog = parseInt64(extractAttribute(md, LiteralStringRef("WorstFreeSpaceTLog")));
		int64_t worstStorageServerQueue = parseInt64(extractAttribute(md, LiteralStringRef("WorstStorageServerQueue")));
		int64_t limitingStorageServerQueue = parseInt64(extractAttribute(md, LiteralStringRef("LimitingStorageServerQueue")));
		int64_t worstTLogQueue = parseInt64(extractAttribute(md, LiteralStringRef("WorstTLogQueue")));
		int64_t totalDiskUsageBytes = parseInt64(extractAttribute(md, LiteralStringRef("TotalDiskUsageBytes")));
		int64_t worstVersionLag = parseInt64(extractAttribute(md, LiteralStringRef("WorstStorageServerVersionLag")));
		int64_t limitingVersionLag = parseInt64(extractAttribute(md, LiteralStringRef("LimitingStorageServerVersionLag")));

		StatusObject readsObj;
		readsObj["hz"] = readReplyRate;
		operationsObj["reads"] = readsObj;

		(*data_overlay)["total_disk_used_bytes"] = totalDiskUsageBytes;

		if(ssCount > 0) {
			(*data_overlay)["least_operating_space_bytes_storage_server"] = std::max(worstFreeSpaceStorageServer, (int64_t)0);
			(*qos)["worst_queue_bytes_storage_server"] = worstStorageServerQueue;
			(*qos)["limiting_queue_bytes_storage_server"] = limitingStorageServerQueue;
			(*qos)["worst_version_lag_storage_server"] = worstVersionLag;
			(*qos)["limiting_version_lag_storage_server"] = limitingVersionLag;
		}

		if(tlogCount > 0) {
			(*data_overlay)["least_operating_space_bytes_log_server"] = std::max(worstFreeSpaceTLog, (int64_t)0);
			(*qos)["worst_queue_bytes_log_server"] = worstTLogQueue;
		}

		(*qos)["transactions_per_second_limit"] = tpsLimit;
		(*qos)["released_transactions_per_second"] = transPerSec;

		int reason = parseInt(extractAttribute(md, LiteralStringRef("Reason")));
		StatusObject perfLimit;
		if (transPerSec > tpsLimit * 0.8) {
			// If reason is known, set qos.performance_limited_by, otherwise omit
			if (reason >= 0 && reason < limitReasonEnd) {
				perfLimit = makeMessage(limitReasonName[reason], limitReasonDesc[reason]);
				std::string reason_server_id = extractAttribute(md, LiteralStringRef("ReasonServerID"));
				if (!reason_server_id.empty())
					perfLimit["reason_server_id"] = reason_server_id;
			}
		}
		else {
			perfLimit = makeMessage("workload", "The database is not being saturated by the workload.");
		}

		if(!perfLimit.empty()) {
			perfLimit["reason_id"] = reason;
			(*qos)["performance_limited_by"] = perfLimit;
		}
	} catch (Error &e){
		if (e.code() == error_code_actor_cancelled)
			throw;
		incomplete_reasons->insert("Unknown read and performance state.");
	}
	statusObj["operations"] = operationsObj;

	return statusObj;
}

static StatusArray oldTlogFetcher(int* oldLogFaultTolerance, Reference<AsyncVar<struct ServerDBInfo>> db, std::unordered_map<NetworkAddress, WorkerInterface> const& address_workers) {
	StatusArray oldTlogsArray;

	if(db->get().recoveryState == RecoveryState::FULLY_RECOVERED) {
		for(auto it : db->get().logSystemConfig.oldTLogs) {
			StatusObject statusObj;
			int failedLogs = 0;
			StatusArray logsObj;
			for(auto log : it.tLogs) {
				StatusObject logObj;
				bool failed = !log.present() || !address_workers.count(log.interf().address());
				logObj["id"] = log.id().shortString();
				logObj["healthy"] = !failed;
				if(log.present()) {
					logObj["address"] = log.interf().address().toString();
				}
				logsObj.push_back(logObj);
				if(failed) {
					failedLogs++;
				}
			}
			*oldLogFaultTolerance = std::min(*oldLogFaultTolerance, it.tLogReplicationFactor - 1 - it.tLogWriteAntiQuorum - failedLogs);
			statusObj["logs"] = logsObj;
			statusObj["log_replication_factor"] = it.tLogReplicationFactor;
			statusObj["log_write_anti_quorum"] = it.tLogWriteAntiQuorum;
			statusObj["log_fault_tolerance"] = it.tLogReplicationFactor - 1 - it.tLogWriteAntiQuorum - failedLogs;
			oldTlogsArray.push_back(statusObj);
		}
	}

	return oldTlogsArray;
}

/*
static StatusObject faultToleranceStatusFetcher(DatabaseConfiguration configuration, ServerCoordinators coordinators, int numTLogEligibleMachines, int minReplicasRemaining, int oldLogFaultTolerance) {
=======
static StatusObject faultToleranceStatusFetcher(DatabaseConfiguration configuration, ServerCoordinators coordinators, std::vector<std::pair<WorkerInterface, ProcessClass>>& workers, int numTLogEligibleMachines, int minReplicasRemaining) {
*/

static StatusObject faultToleranceStatusFetcher(DatabaseConfiguration configuration, ServerCoordinators coordinators, std::vector<std::pair<WorkerInterface, ProcessClass>>& workers, int numTLogEligibleMachines, int minReplicasRemaining) {
	StatusObject statusObj;

	// without losing data
	int32_t maxMachineFailures = configuration.maxMachineFailuresTolerated();
	int maxCoordinatorFailures = (coordinators.clientLeaderServers.size() - 1) / 2;

	std::map<NetworkAddress, StringRef> workerZones;
	for(auto& worker : workers) {
		workerZones[worker.first.address()] = worker.first.locality.zoneId().orDefault(LiteralStringRef(""));
	}
	std::map<StringRef, int> coordinatorZoneCounts;
	for(auto& coordinator : coordinators.ccf->getConnectionString().coordinators()) {
		auto zone = workerZones[coordinator];
		coordinatorZoneCounts[zone] += 1;
	}
	std::vector<std::pair<StringRef, int>> coordinatorZones(coordinatorZoneCounts.begin(), coordinatorZoneCounts.end());
	std::sort(coordinatorZones.begin(), coordinatorZones.end(), [] (const std::pair<StringRef,int>& lhs, const std::pair<StringRef,int>& rhs) {
		return lhs.second > rhs.second;
	});
	int lostCoordinators = 0;
	int maxCoordinatorZoneFailures = 0;
	for(auto zone : coordinatorZones) {
		lostCoordinators += zone.second;
		if(lostCoordinators > maxCoordinatorFailures) {
			break;
		}
		maxCoordinatorZoneFailures += 1;
	}

	int machineFailuresWithoutLosingData = std::min(maxMachineFailures, maxCoordinatorZoneFailures);

	if (minReplicasRemaining >= 0){
		machineFailuresWithoutLosingData = std::min(machineFailuresWithoutLosingData, minReplicasRemaining - 1);
	}
	// ahm
	//	machineFailuresWithoutLosingData = std::min(machineFailuresWithoutLosingData, oldLogFaultTolerance);

	statusObj["max_machine_failures_without_losing_data"] = std::max(machineFailuresWithoutLosingData, 0);

	// without losing availablity
	statusObj["max_machine_failures_without_losing_availability"] = std::max(std::min(numTLogEligibleMachines - configuration.minMachinesRequired(), machineFailuresWithoutLosingData), 0);
	return statusObj;
}

static std::string getIssueDescription(std::string name) {
	if(name == "unable_to_write_cluster_file") {
		return "Unable to update cluster file.";
	}

	// FIXME: name and description will be the same unless the message is 'unable_to_write_cluster_file', which is currently the only possible message
	return name;
}

static std::map<std::string, StatusObject> getProcessIssuesAsMessages( ProcessIssuesMap const& _issues ) {
	std::map<std::string, StatusObject> issuesMap;

	try {
		ProcessIssuesMap issues = _issues;
		for (auto i : issues) {
			StatusObject message = makeMessage(i.second.first.c_str(), getIssueDescription(i.second.first).c_str());
			issuesMap[i.first.toString()] = message;
		}
	}
	catch (Error &e) {
		TraceEvent(SevError, "ErrorParsingProcessIssues").error(e);
		// swallow
	}

	return issuesMap;
}

static StatusArray getClientIssuesAsMessages( ProcessIssuesMap const& _issues) {
	StatusArray issuesList;

	try {
		ProcessIssuesMap issues = _issues;
		std::map<std::string, std::vector<std::string>> deduplicatedIssues;

		for(auto i : issues) {
			deduplicatedIssues[i.second.first].push_back(format("%s:%d", toIPString(i.first.ip).c_str(), i.first.port));
		}

		for (auto i : deduplicatedIssues) {
			StatusObject message = makeMessage(i.first.c_str(), getIssueDescription(i.first).c_str());
			StatusArray addresses;
			for(auto addr : i.second) {
				addresses.push_back(addr);
			}

			message["addresses"] = addresses;
			issuesList.push_back(message);
		}
	}
	catch (Error &e) {
		TraceEvent(SevError, "ErrorParsingClientIssues").error(e);
		// swallow
	}

	return issuesList;
}

ACTOR Future<StatusObject> layerStatusFetcher(Database cx, StatusArray *messages, std::set<std::string> *incomplete_reasons) {
	state StatusObject result;
	state JSONDoc json(result);

	try {
		state ReadYourWritesTransaction tr(cx);
		loop {
			try {
				tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
				int64_t timeout_ms = 3000;
				tr.setOption(FDBTransactionOptions::TIMEOUT, StringRef((uint8_t *)&timeout_ms, sizeof(int64_t)));

				std::string jsonPrefix = layerStatusMetaPrefixRange.begin.toString() + "json/";
				Standalone<RangeResultRef> jsonLayers = wait(tr.getRange(KeyRangeRef(jsonPrefix, strinc(jsonPrefix)), 1000));
				// TODO:  Also fetch other linked subtrees of meta keys

				state std::vector<Future<Standalone<RangeResultRef>>> docFutures;
				state int i;
				for(i = 0; i < jsonLayers.size(); ++i)
					docFutures.push_back(tr.getRange(KeyRangeRef(jsonLayers[i].value, strinc(jsonLayers[i].value)), 1000));

				result.clear();
				JSONDoc::expires_reference_version = (uint64_t)tr.getReadVersion().get();

				for(i = 0; i < docFutures.size(); ++i) {
					state Standalone<RangeResultRef> docs = wait(docFutures[i]);
					state int j;
					for(j = 0; j < docs.size(); ++j) {
						state json_spirit::mValue doc;
						try {
							json_spirit::read_string(docs[j].value.toString(), doc);
							Void _ = wait(yield());
							json.absorb(doc.get_obj());
							Void _ = wait(yield());
						} catch(Error &e) {
							TraceEvent(SevWarn, "LayerStatusBadJSON").detail("Key", printable(docs[j].key));
						}
					}
				}
				json.create("_valid") = true;
				break;
			} catch(Error &e) {
				Void _ = wait(tr.onError(e));
			}
		}
	} catch(Error &e) {
		TraceEvent(SevWarn, "LayerStatusError").error(e);
		incomplete_reasons->insert(format("Unable to retrieve layer status (%s).", e.what()));
		json.create("_error") = format("Unable to retrieve layer status (%s).", e.what());
		json.create("_valid") = false;
	}

	json.cleanOps();
	return result;
}

ACTOR Future<StatusObject> lockedStatusFetcher(Reference<AsyncVar<struct ServerDBInfo>> db, StatusArray *messages, std::set<std::string> *incomplete_reasons) {
	state StatusObject statusObj;

	state Database cx = openDBOnServer(db, TaskDefaultEndpoint, true, false); // Open a new database connection that isn't lock-aware
	state Transaction tr(cx);
	state int timeoutSeconds = 5;
	state Future<Void> getTimeout = delay(timeoutSeconds);

	loop {
		tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
		try {
			choose{
				when(Version f = wait(tr.getReadVersion())) {
					statusObj["database_locked"] = false;
				}

				when(Void _ = wait(getTimeout)) {
					incomplete_reasons->insert(format("Unable to determine if database is locked after %d seconds.", timeoutSeconds));
				}
			}
			break;
		}
		catch (Error &e) {
			if (e.code() == error_code_database_locked) {
				statusObj["database_locked"] = true;
				break;
			}
			else {
				try {
					Void _ = wait(tr.onError(e));
				}
				catch (Error &e) {
					incomplete_reasons->insert(format("Unable to determine if database is locked (%s).", e.what()));
					break;
				}
			}
		}
	}
	return statusObj;
}

// constructs the cluster section of the json status output
ACTOR Future<StatusReply> clusterGetStatus(
		Reference<AsyncVar<struct ServerDBInfo>> db,
		Database cx,
		vector<std::pair<WorkerInterface, ProcessClass>> workers,
		ProcessIssuesMap workerIssues,
		ProcessIssuesMap clientIssues,
		ClientVersionMap clientVersionMap,
		ServerCoordinators coordinators,
		std::vector<NetworkAddress> incompatibleConnections )
{
	// since we no longer offer multi-database support, all databases must be named DB
	state std::string dbName = "DB";

	// Check if master worker is present
	state StatusArray messages;
	state std::set<std::string> status_incomplete_reasons;
	state std::pair<WorkerInterface, ProcessClass> mWorker;

	try {
		// Get the master Worker interface
		Optional<std::pair<WorkerInterface, ProcessClass>> _mWorker = getWorker( workers, db->get().master.address() );
		if (_mWorker.present()) {
			mWorker = _mWorker.get();
		} else {
			messages.push_back(makeMessage("unreachable_master_worker", "Unable to locate the master worker."));
		}

		// Get latest events for various event types from ALL workers
		// WorkerEvents is a map of worker's NetworkAddress to its event string
		// The pair represents worker responses and a set of worker NetworkAddress strings which did not respond
		std::vector< Future< Optional <std::pair<WorkerEvents, std::set<std::string>>> > > futures;
		futures.push_back(latestEventOnWorkers(workers, "MachineMetrics"));
		futures.push_back(latestEventOnWorkers(workers, "ProcessMetrics"));
		futures.push_back(latestErrorOnWorkers(workers));
		futures.push_back(latestEventOnWorkers(workers, "TraceFileOpenError"));
		futures.push_back(latestEventOnWorkers(workers, "ProgramStart"));

		// Wait for all response pairs.
		state std::vector< Optional <std::pair<WorkerEvents, std::set<std::string>>> > workerEventsVec = wait(getAll(futures));

		// Create a unique set of all workers who were unreachable for 1 or more of the event requests above.
		// Since each event request is independent and to all workers, workers can have responded to some
		// event requests but still end up in the unreachable set.
		std::set<std::string> mergeUnreachable;

		// For each (optional) pair, if the pair is present and not empty then add the unreachable workers to the set.
		for (auto pair : workerEventsVec)
		{
			if (pair.present() && pair.get().second.size())
				mergeUnreachable.insert(pair.get().second.begin(), pair.get().second.end());
		}

		// We now have a unique set of workers who were in some way unreachable.  If there is anything in that set, create a message
		// for it and include the list of unreachable processes.
		if (mergeUnreachable.size()){
			StatusObject message = makeMessage("unreachable_processes", "The cluster has some unreachable processes.");
			StatusArray unreachableProcs;
			for (auto m : mergeUnreachable){
				unreachableProcs.push_back(StatusObject({ {"address", m} }));
			}
			message["unreachable_processes"] = unreachableProcs;
			messages.push_back(message);
		}

		// construct status information for cluster subsections
		state StatusObject recoveryStateStatus = wait(recoveryStateStatusFetcher(mWorker, dbName, workers.size(), &status_incomplete_reasons));

		// machine metrics
		state WorkerEvents mMetrics = workerEventsVec[0].present() ? workerEventsVec[0].get().first : WorkerEvents();
		// process metrics
		state WorkerEvents pMetrics = workerEventsVec[1].present() ? workerEventsVec[1].get().first : WorkerEvents();
		state WorkerEvents latestError = workerEventsVec[2].present() ? workerEventsVec[2].get().first : WorkerEvents();
		state WorkerEvents traceFileOpenErrors = workerEventsVec[3].present() ? workerEventsVec[3].get().first : WorkerEvents();
		state WorkerEvents programStarts = workerEventsVec[4].present() ? workerEventsVec[4].get().first : WorkerEvents();

		state StatusObject statusObj;
		if(db->get().recoveryCount > 0) {
			statusObj["generation"] = db->get().recoveryCount;
		}

		state std::map<std::string, StatusObject> processIssues = getProcessIssuesAsMessages(workerIssues);
		state vector<std::pair<StorageServerInterface, std::string>> storageServers;
		state vector<std::pair<TLogInterface, std::string>> tLogs;
		state StatusObject qos;
		state StatusObject data_overlay;

		statusObj["protocol_version"] = format("%llx", currentProtocolVersion);

		state Optional<DatabaseConfiguration> configuration = Optional<DatabaseConfiguration>();

		if(!(recoveryStateStatus.count("name") && recoveryStateStatus["name"] == RecoveryStatus::names[RecoveryStatus::configuration_missing])) {
			Optional<DatabaseConfiguration> _configuration = wait(loadConfiguration(cx, &messages, &status_incomplete_reasons));
			configuration = _configuration;
		}

		statusObj["machines"] = machineStatusFetcher(mMetrics, workers, configuration, &status_incomplete_reasons);

		if (configuration.present()){
			// Do the latency probe by itself to avoid interference from other status activities
			StatusObject latencyProbeResults = wait(latencyProbeFetcher(cx, &messages, &status_incomplete_reasons));

			statusObj["database_available"] = latencyProbeResults.count("immediate_priority_transaction_start_seconds") && latencyProbeResults.count("read_seconds") && latencyProbeResults.count("commit_seconds");
			if (!latencyProbeResults.empty()) {
				statusObj["latency_probe"] = latencyProbeResults;
			}

			state int minReplicasRemaining = -1;
			std::vector<Future<StatusObject>> futures2;
			futures2.push_back(dataStatusFetcher(mWorker, dbName, &minReplicasRemaining));
			futures2.push_back(workloadStatusFetcher(db, workers, mWorker, dbName, &qos, &data_overlay, &status_incomplete_reasons));
			futures2.push_back(layerStatusFetcher(cx, &messages, &status_incomplete_reasons));
			futures2.push_back(lockedStatusFetcher(db, &messages, &status_incomplete_reasons));

			// Start getting storage servers now (using system priority) concurrently.  Using sys priority because having storage servers
			// in status output is important to give context to error messages in status that reference a storage server role ID.
			state std::unordered_map<NetworkAddress, WorkerInterface> address_workers;
			for (auto worker : workers)
				address_workers[worker.first.address()] = worker.first;
			state Future<ErrorOr<vector<std::pair<StorageServerInterface, std::string>>>> storageServerFuture = errorOr(getStorageServersAndMetrics(cx, address_workers));
			state Future<ErrorOr<vector<std::pair<TLogInterface, std::string>>>> tLogFuture = errorOr(getTLogsAndMetrics(db, address_workers));

			state std::vector<StatusObject> workerStatuses = wait(getAll(futures2));

			int oldLogFaultTolerance = 100;
			if(db->get().recoveryState == RecoveryState::FULLY_RECOVERED && db->get().logSystemConfig.oldTLogs.size() > 0) {
				statusObj["old_logs"] = oldTlogFetcher(&oldLogFaultTolerance, db, address_workers);
			}

			if(configuration.present()) {
				std::set<StringRef> tlogEligibleMachines = getTLogEligibleMachines(workers, configuration.get());
				statusObj["fault_tolerance"] = faultToleranceStatusFetcher(configuration.get(), coordinators, workers, tlogEligibleMachines.size(), minReplicasRemaining);
			}

			StatusObject configObj = configurationFetcher(configuration, coordinators, &status_incomplete_reasons);

			// configArr could be empty
			if (!configObj.empty())
				statusObj["configuration"] = configObj;

			// workloadStatusFetcher returns the workload section but also optionally writes the qos section and adds to the data_overlay object
			if (!workerStatuses[1].empty())
				statusObj["workload"] = workerStatuses[1];

			statusObj["layers"] = workerStatuses[2];

			// Add qos section if it was populated
			if (!qos.empty())
				statusObj["qos"] = qos;

			// Merge data_overlay into data
			StatusObject &clusterDataSection = workerStatuses[0];
			clusterDataSection.insert(data_overlay.begin(), data_overlay.end());

			// If data section not empty, add it to statusObj
			if (!clusterDataSection.empty())
				statusObj["data"] = clusterDataSection;

			// Insert database_locked section
			if(!workerStatuses[3].empty()) {
				statusObj.insert(workerStatuses[3].begin(), workerStatuses[3].end());
			}

			// Need storage servers now for processStatusFetcher() below.
			ErrorOr<vector<std::pair<StorageServerInterface, std::string>>> _storageServers = wait(storageServerFuture);
			if (_storageServers.present()) {
				storageServers = _storageServers.get();
			}
			else
				messages.push_back(makeMessage("storage_servers_error", "Timed out trying to retrieve storage servers."));

			// ...also tlogs
			ErrorOr<vector<std::pair<TLogInterface, std::string>>> _tLogs = wait(tLogFuture);
			if (_tLogs.present()) {
				tLogs = _tLogs.get();
			}
			else
				messages.push_back(makeMessage("log_servers_error", "Timed out trying to retrieve log servers."));
		}
		else {
			// Set layers status to { _valid: false, error: "configurationMissing"}
			statusObj["layers"] = json_spirit::mObject({{"_valid", false}, {"_error", "configurationMissing"}});
		}

		StatusObject processStatus = wait(processStatusFetcher(db, workers, pMetrics, mMetrics, latestError, traceFileOpenErrors, programStarts, processIssues, storageServers, tLogs, cx, configuration, &status_incomplete_reasons));
		statusObj["processes"] = processStatus;
		statusObj["clients"] = clientStatusFetcher(clientVersionMap);

		StatusArray incompatibleConnectionsArray;
		for(auto it : incompatibleConnections) {
			incompatibleConnectionsArray.push_back(it.toString());
		}
		statusObj["incompatible_connections"] = incompatibleConnectionsArray;

		if (!recoveryStateStatus.empty())
			statusObj["recovery_state"] = recoveryStateStatus;

		// cluster messages subsection;
		StatusArray clientIssuesArr = getClientIssuesAsMessages(clientIssues);
		if (clientIssuesArr.size() > 0) {
			StatusObject clientIssueMessage = makeMessage("client_issues", "Some clients of this cluster have issues.");
			clientIssueMessage["issues"] = clientIssuesArr;
			messages.push_back(clientIssueMessage);
		}

		// Create the status_incomplete message if there were any reasons that the status is incomplete.
		if (!status_incomplete_reasons.empty())
		{
			StatusObject incomplete_message = makeMessage("status_incomplete", "Unable to retrieve all status information.");
			// Make a JSON array of all of the reasons in the status_incomplete_reasons set.
			StatusArray reasons;
			for (auto i : status_incomplete_reasons)
				reasons.push_back(StatusObject({ { "description", i } }));
			incomplete_message["reasons"] = reasons;
			messages.push_back(incomplete_message);
		}

		statusObj["messages"] = messages;

		int64_t clusterTime = time(0);
		if (clusterTime != -1){
			statusObj["cluster_controller_timestamp"] = clusterTime;
		}

		return statusObj;
	} catch( Error&e ) {
		TraceEvent(SevError, "StatusError").error(e);
		throw;
	}
}

TEST_CASE("status/json/merging") {
	StatusObject objA, objB, objC;
	JSONDoc a(objA), b(objB), c(objC);

	a.create("int_one") = 1;
	a.create("int_unmatched") = 2;
	a.create("int_total_30.$sum") = 10;
	a.create("bool_true.$and") = true;
	a.create("string") = "test";
	a.create("subdoc.int_11") = 11;
	a.create("a") = "justA";
	a.create("subdoc.double_max_5.$max") = 2.0;
	a.create("subdoc.double_min_2.$min") = 2.0;
	a.create("subdoc.obj_count_3.$count_keys.one") = 1;
	a.create("subdoc.obj_count_3.$count_keys.two") = 2;
	a.create("expired.$expires") = "I should have expired.";
	a.create("expired.version") = 1;
	a.create("not_expired_and_merged.$expires.seven.$sum") = 1;
	a.create("not_expired_and_merged.$expires.one.$min") = 3;
	a.create("not_expired_and_merged.version") = 3;

	b.create("int_one") = 1;
	b.create("int_unmatched") = 3;
	b.create("int_total_30.$sum") = 20;
	b.create("bool_true.$and") = true;
	b.create("string") = "test";
	b.create("subdoc.int_11") = 11;
	b.create("b") = "justB";
	b.create("subdoc.double_max_5.$max") = 5.0;
	b.create("subdoc.double_min_2.$min") = 5.0;
	b.create("subdoc.obj_count_3.$count_keys.three") = 3;
	b.create("expired.$expires") = "I should have also expired.";
	b.create("expired.version") = 1;
	b.create("not_expired_and_merged.$expires.seven.$sum") = 2;
	b.create("not_expired_and_merged.$expires.one.$min") = 1;
	b.create("not_expired_and_merged.version") = 3;
	b.create("last_hello.$last") = "blah";
	b.create("latest_obj.$latest.a") = 0;
	b.create("latest_obj.$latest.b") = 0;
	b.create("latest_obj.$latest.c") = 0;
	b.create("latest_obj.timestamp") = 2;
	b.create("latest_int_5.$latest") = 7;
	b.create("latest_int_5.timestamp") = 2;

	c.create("int_total_30.$sum") = 0;
	c.create("not_expired.$expires") = "I am still valid";
	c.create("not_expired.version") = 3;
	c.create("not_expired_and_merged.$expires.seven.$sum") = 4;
	c.create("not_expired_and_merged.$expires.one.$min") = 2;
	c.create("not_expired_and_merged.version") = 3;
	c.create("last_hello.$last") = "hello";
	c.create("latest_obj.$latest.a.$max") = "a";
	c.create("latest_obj.$latest.b.$min") = "b";
	c.create("latest_obj.$latest.expired.$expires") = "I should not be here.";
	c.create("latest_obj.$latest.expired.version") = 1;
	c.create("latest_obj.$latest.not_expired.$expires") = "Still alive.";
	c.create("latest_obj.$latest.not_expired.version") = 3;
	c.create("latest_obj.timestamp") = 3;
	b.create("latest_int_5.$latest") = 5;
	b.create("latest_int_5.timestamp") = 3;

	JSONDoc::expires_reference_version = 2;
	a.absorb(b);
	a.absorb(c);
	a.cleanOps();
	std::string result = json_spirit::write_string(json_spirit::mValue(objA));
	std::string expected = "{\"a\":\"justA\",\"b\":\"justB\",\"bool_true\":true,\"expired\":null,\"int_one\":1,\"int_total_30\":30,\"int_unmatched\":{\"ERROR\":\"Values do not match.\",\"a\":2,\"b\":3},\"last_hello\":\"hello\",\"latest_int_5\":5,\"latest_obj\":{\"a\":\"a\",\"b\":\"b\",\"not_expired\":\"Still alive.\"},\"not_expired\":\"I am still valid\",\"not_expired_and_merged\":{\"one\":1,\"seven\":7},\"string\":\"test\",\"subdoc\":{\"double_max_5\":5,\"double_min_2\":2,\"int_11\":11,\"obj_count_3\":3}}";

	if(result != expected) {
		printf("ERROR:  Combined doc does not match expected.\nexpected: %s\nresult:   %s\n", expected.c_str(), result.c_str());
		ASSERT(false);
	}

	return Void();
}