Merge branch 'release-6.2' into add-coordinator-to-status-roles-list

# Conflicts: # documentation/sphinx/source/release-notes.rst
2019-08-19 15:03:59 -07:00 · 2019-08-19 15:03:59 -07:00 · 2b80d836f4
parent dc534aea1a 3965a959b4
commit 2b80d836f4
10 changed files with 119 additions and 37 deletions
--- a/documentation/sphinx/source/configuration.rst
+++ b/documentation/sphinx/source/configuration.rst
@ -199,14 +199,12 @@ The ``foundationdb.conf`` file contains several sections, detailed below. Note t
    ## foundationdb.conf 
    ##
    ## Configuration file for FoundationDB server processes 
-    ## Full documentation is available in the FoundationDB Administration document.

    [fdbmonitor]
-    restart_delay = 60
    user = foundationdb
    group = foundationdb

-Contains basic configuration parameters of the ``fdbmonitor`` process. ``restart_delay`` specifies the number of seconds that ``fdbmonitor`` waits before restarting a failed process. ``user`` and ``group`` are used on Linux systems to control the privilege level of child processes.
+Contains basic configuration parameters of the ``fdbmonitor`` process. ``user`` and ``group`` are used on Linux systems to control the privilege level of child processes.

 ``[general]`` section
 -----------------------
@ -215,8 +213,41 @@ Contains basic configuration parameters of the ``fdbmonitor`` process. ``restart

    [general]
    cluster_file = /etc/foundationdb/fdb.cluster
+    restart_delay = 60
+    ## restart_backoff and restart_delay_reset_interval default to the value that is used for restart_delay
+    # initial_restart_delay = 0
+    # restart_backoff = 60.0
+    # restart_delay_reset_interval = 60
+    # delete_envvars =
+    # kill_on_configuration_change = true
+    # disable_lifecycle_logging = false

-Contains settings applicable to all processes (e.g. fdbserver, backup_agent). The main setting of interest is ``cluster_file``, which specifies the location of the cluster file. This file and the directory that contains it must be writable by all processes (i.e. by the user or group set in the [fdbmonitor] section).
+Contains settings applicable to all processes (e.g. fdbserver, backup_agent).
+
+* ``cluster_file``: Specifies the location of the cluster file. This file and the directory that contains it must be writable by all processes (i.e. by the user or group set in the ``[fdbmonitor]`` section).
+* ``delete_envvars``: A space separated list of environment variables to remove from the environments of child processes. This can be used if the ``fdbmonitor`` process needs to be run with environment variables that are undesired in its children.
+* ``kill_on_configuration_change``: If ``true``, affected processes will be restarted whenever the configuration file changes. Defaults to ``true``.
+* ``disable_lifecycle_logging``: If ``true``, ``fdbmonitor`` will not write log events when processes start or terminate. Defaults to ``false``.
+
+The ``[general]`` section also contains some parameters to control how processes are restarted when they die. ``fdbmonitor`` uses backoff logic to prevent a process that dies repeatedly from cycling too quickly, and it also introduces up to +/-10% random jitter into the delay to avoid multiple processes all restarting simultaneously. ``fdbmonitor`` tracks separate backoff state for each process, so the restarting of one process will have no effect on the backoff behavior of another.
+
+* ``restart_delay``: The maximum number of seconds (subject to jitter) that fdbmonitor will delay before restarting a failed process.
+* ``initial_restart_delay``: The number of seconds ``fdbmonitor`` waits to restart a process the first time it dies. Defaults to 0 (i.e. the process gets restarted immediately). 
+* ``restart_backoff``: Controls how quickly ``fdbmonitor`` backs off when a process dies repeatedly. The previous delay (or 1, if the previous delay is 0) is multiplied by ``restart_backoff`` to get the next delay, maxing out at the value of ``restart_delay``. Defaults to the value of ``restart_delay``, meaning that the second and subsequent failures will all delay ``restart_delay`` between restarts.
+* ``restart_delay_reset_interval``: The number of seconds a process must be running before resetting the backoff back to the value of ``initial_restart_delay``. Defaults to the value of ``restart_delay``.
+
+As an example, let's say the following parameters have been set:
+
+.. code-block:: ini
+
+    restart_delay = 60
+    initial_restart_delay = 0
+    restart_backoff = 2.0
+    restart_delay_reset_interval = 180
+
+The progression of delays for a process that fails repeatedly would be ``0, 2, 4, 8, 16, 32, 60, 60, ...``, each subject to a 10% random jitter. After the process stays alive for 180 seconds, the backoff would reset and the next failure would restart the process immediately.
+
+Using the default parameters, a process will restart immediately if it fails and then delay ``restart_delay`` seconds if it fails again within ``restart_delay`` seconds. 

 .. _foundationdb-conf-fdbserver:

--- a/documentation/sphinx/source/mr-status-json-schemas.rst.inc
+++ b/documentation/sphinx/source/mr-status-json-schemas.rst.inc
@ -265,7 +265,23 @@
         "limiting_queue_bytes_storage_server":0,
         "worst_queue_bytes_storage_server":0,
         "limiting_version_lag_storage_server":0,
-         "worst_version_lag_storage_server":0
+         "worst_version_lag_storage_server":0,
+         "limiting_data_lag_storage_server":{
+            "versions":0,
+            "seconds":0.0
+         },
+         "worst_data_lag_storage_server":{
+            "versions":0,
+            "seconds":0.0
+         },
+         "limiting_durability_lag_storage_server":{
+            "versions":0,
+            "seconds":0.0
+         },
+         "worst_durability_lag_storage_server":{
+            "versions":0,
+            "seconds":0.0
+         }
      },
      "incompatible_connections":[
      ],
--- a/documentation/sphinx/source/release-notes.rst
+++ b/documentation/sphinx/source/release-notes.rst
@ -2,7 +2,7 @@
 Release Notes
 #############

-6.2.2
+6.2.3
 =====

 Performance
@ -40,6 +40,8 @@ Fixes
 * In very rare scenarios, master recovery would restart because system metadata was loaded incorrectly. `(PR #1919) <https://github.com/apple/foundationdb/pull/1919>`_.
 * Ratekeeper will aggressively throttle when unable to fetch the list of storage servers for a considerable period of time. `(PR #1858) <https://github.com/apple/foundationdb/pull/1858>`_.
 * Proxies could become overloaded when all storage servers on a team fail. [6.2.1] `(PR #1976) <https://github.com/apple/foundationdb/pull/1976>`_.
+* The ``fileconfigure`` command in ``fdbcli`` could fail with an unknown error if the file did not contain a valid JSON object. `(PR #2017) <https://github.com/apple/foundationdb/pull/2017>`_.
+* Configuring regions would fail with an internal error if the cluster contained storage servers that didn't set a datacenter ID. `(PR #2017) <https://github.com/apple/foundationdb/pull/2017>`_.

 Status
 ------
@ -55,6 +57,8 @@ Status
 * Added ``max_protocol_clients`` to the ``supported_versions`` section, which provides a sample of connected clients which cannot connect to any higher protocol version. `(PR #1902) <https://github.com/apple/foundationdb/pull/1902>`_.
 * Clients which connect without specifying their supported versions are tracked as an ``Unknown`` version in the ``supported_versions`` section. [6.2.2] `(PR #1990) <https://github.com/apple/foundationdb/pull/1990>`_.
 * Add ``coordinator`` to the list of roles that can be reported for a process. [6.2.3] `(PR #2006) <https://github.com/apple/foundationdb/pull/2006>`_.
+* Added ``worst_durability_lag_storage_server`` and ``limiting_durability_lag_storage_server`` to  the ``cluster.qos`` section, each with subfields ``versions`` and ``seconds``. These report the durability lag values being used by ratekeeper to potentially limit the transaction rate. [6.2.3] `(PR #2003) <https://github.com/apple/foundationdb/pull/2003>`_.
+* Added ``worst_data_lag_storage_server`` and ``limiting_data_lag_storage_server`` to  the ``cluster.qos`` section, each with subfields ``versions`` and ``seconds``. These are meant to replace ``worst_version_lag_storage_server`` and ``limiting_version_lag_storage_server``, which are now deprecated. [6.2.3] `(PR #2003) <https://github.com/apple/foundationdb/pull/2003>`_.

 Bindings
 --------
@ -93,6 +97,7 @@ Fixes only impacting 6.2.0+
 * Clients could crash when closing connections with incompatible servers. [6.2.1] `(PR #1976) <https://github.com/apple/foundationdb/pull/1976>`_.
 * Do not close idle network connections with incompatible servers. [6.2.1] `(PR #1976) <https://github.com/apple/foundationdb/pull/1976>`_.
 * In status, ``max_protocol_clients`` were incorrectly added to the ``connected_clients`` list. [6.2.2] `(PR #1990) <https://github.com/apple/foundationdb/pull/1990>`_.
+* Ratekeeper ignores the (default 5 second) MVCC window when controlling on durability lag. [6.2.3] `(PR #2012) <https://github.com/apple/foundationdb/pull/2012>`_.

 Earlier release notes
 ---------------------
--- a/fdbbackup/backup.actor.cpp
+++ b/fdbbackup/backup.actor.cpp
@ -586,7 +586,6 @@ CSimpleOpt::SOption g_rgDBAgentOptions[] = {
 #ifdef _WIN32
 	{ OPT_PARENTPID,      "--parentpid",       SO_REQ_SEP },
 #endif
-	{ OPT_TRACE_LOG_GROUP, "--loggroup",       SO_REQ_SEP },
 	{ OPT_SOURCE_CLUSTER,  "-s",               SO_REQ_SEP },
 	{ OPT_SOURCE_CLUSTER,  "--source",         SO_REQ_SEP },
 	{ OPT_DEST_CLUSTER,    "-d",               SO_REQ_SEP },
@ -826,6 +825,9 @@ static void printAgentUsage(bool devhelp) {
 		   "  --logdir PATH  Specifes the output directory for trace files. If\n"
 		   "                 unspecified, defaults to the current directory. Has\n"
 		   "                 no effect unless --log is specified.\n");
+	printf("  --loggroup LOG_GROUP\n"
+	       "                 Sets the LogGroup field with the specified value for all\n"
+	       "                 events in the trace output (defaults to `default').\n");
 	printf("  --trace_format FORMAT\n"
 		   "                 Select the format of the trace files. xml (the default) and json are supported.\n"
 		   "                 Has no effect unless --log is specified.\n");
@ -912,6 +914,9 @@ static void printBackupUsage(bool devhelp) {
 		   "  --logdir PATH  Specifes the output directory for trace files. If\n"
 		   "                 unspecified, defaults to the current directory. Has\n"
 		   "                 no effect unless --log is specified.\n");
+	printf("  --loggroup LOG_GROUP\n"
+	       "                 Sets the LogGroup field with the specified value for all\n"
+	       "                 events in the trace output (defaults to `default').\n");
 	printf("  --trace_format FORMAT\n"
 		   "                 Select the format of the trace files. xml (the default) and json are supported.\n"
 		   "                 Has no effect unless --log is specified.\n");
@ -970,6 +975,9 @@ static void printRestoreUsage(bool devhelp ) {
 		   "  --logdir PATH  Specifes the output directory for trace files. If\n"
 		   "                 unspecified, defaults to the current directory. Has\n"
 		   "                 no effect unless --log is specified.\n");
+	printf("  --loggroup LOG_GROUP\n"
+	       "                 Sets the LogGroup field with the specified value for all\n"
+	       "                 events in the trace output (defaults to `default').\n");
 	printf("  --trace_format FORMAT\n"
 		   "                 Select the format of the trace files. xml (the default) and json are supported.\n"
 		   "                 Has no effect unless --log is specified.\n");
@ -1015,6 +1023,9 @@ static void printDBAgentUsage(bool devhelp) {
 		   "  --logdir PATH  Specifes the output directory for trace files. If\n"
 		   "                 unspecified, defaults to the current directory. Has\n"
 		   "                 no effect unless --log is specified.\n");
+	printf("  --loggroup LOG_GROUP\n"
+	       "                 Sets the LogGroup field with the specified value for all\n"
+	       "                 events in the trace output (defaults to `default').\n");
 	printf("  --trace_format FORMAT\n"
 		   "                 Select the format of the trace files. xml (the default) and json are supported.\n"
 		   "                 Has no effect unless --log is specified.\n");
@ -1062,6 +1073,9 @@ static void printDBBackupUsage(bool devhelp) {
 		   "  --logdir PATH  Specifes the output directory for trace files. If\n"
 		   "                 unspecified, defaults to the current directory. Has\n"
 		   "                 no effect unless --log is specified.\n");
+	printf("  --loggroup LOG_GROUP\n"
+	       "                 Sets the LogGroup field with the specified value for all\n"
+	       "                 events in the trace output (defaults to `default').\n");
 	printf("  --trace_format FORMAT\n"
 		   "                 Select the format of the trace files. xml (the default) and json are supported.\n"
 		   "                 Has no effect unless --log is specified.\n");
--- a/fdbcli/fdbcli.actor.cpp
+++ b/fdbcli/fdbcli.actor.cpp
@ -1777,6 +1777,10 @@ ACTOR Future<bool> fileConfigure(Database db, std::string filePath, bool isNewDa
 		printf("ERROR: Invalid JSON\n");
 		return true;
 	}
+	if(config.type() != json_spirit::obj_type) {
+		printf("ERROR: Configuration file must contain a JSON object\n");
+		return true;
+	}
 	StatusObject configJSON = config.get_obj();

 	json_spirit::mValue schema;
--- a/fdbclient/ManagementAPI.actor.cpp
+++ b/fdbclient/ManagementAPI.actor.cpp
@ -423,11 +423,11 @@ ACTOR Future<ConfigurationResult::Type> changeConfig( Database cx, std::map<std:
 						for(auto& it : newConfig.regions) {
 							newDcIds.insert(it.dcId);
 						}
-						std::set<Key> missingDcIds;
+						std::set<Optional<Key>> missingDcIds;
 						for(auto& s : serverList) {
 							auto ssi = decodeServerListValue( s.value );
 							if ( !ssi.locality.dcId().present() || !newDcIds.count(ssi.locality.dcId().get()) ) {
-								missingDcIds.insert(ssi.locality.dcId().get());
+								missingDcIds.insert(ssi.locality.dcId());
 							}
 						}
 						if(missingDcIds.size() > (oldReplicationUsesDcId ? 1 : 0)) {
--- a/fdbclient/Schemas.cpp
+++ b/fdbclient/Schemas.cpp
@ -287,7 +287,23 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
         "limiting_queue_bytes_storage_server":0,
         "worst_queue_bytes_storage_server":0,
         "limiting_version_lag_storage_server":0,
-         "worst_version_lag_storage_server":0
+         "worst_version_lag_storage_server":0,
+         "limiting_data_lag_storage_server":{
+            "versions":0,
+            "seconds":0.0
+         },
+         "worst_data_lag_storage_server":{
+            "versions":0,
+            "seconds":0.0
+         },
+         "limiting_durability_lag_storage_server":{
+            "versions":0,
+            "seconds":0.0
+         },
+         "worst_durability_lag_storage_server":{
+            "versions":0,
+            "seconds":0.0
+         }
      },
      "incompatible_connections":[

--- a/fdbserver/Ratekeeper.actor.cpp
+++ b/fdbserver/Ratekeeper.actor.cpp
@ -93,7 +93,6 @@ struct StorageQueueInfo {
 	Smoother verySmoothDurableVersion, smoothLatestVersion;
 	Smoother smoothFreeSpace;
 	Smoother smoothTotalSpace;
-	double localRateLimit;
 	limitReason_t limitReason;
 	StorageQueueInfo(UID id, LocalityData locality) : valid(false), id(id), locality(locality), smoothDurableBytes(SERVER_KNOBS->SMOOTHING_AMOUNT),
 		smoothInputBytes(SERVER_KNOBS->SMOOTHING_AMOUNT), verySmoothDurableBytes(SERVER_KNOBS->SLOW_SMOOTHING_AMOUNT),
@ -147,7 +146,7 @@ struct RatekeeperLimits {
 		logTargetBytes(logTargetBytes),
 		logSpringBytes(logSpringBytes),
 		maxVersionDifference(maxVersionDifference),
-		durabilityLagTargetVersions(durabilityLagTargetVersions),
+		durabilityLagTargetVersions(durabilityLagTargetVersions + SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS), // The read transaction life versions are expected to not be durable on the storage servers
 		durabilityLagLimit(std::numeric_limits<double>::infinity()),
 		lastDurabilityLag(0),
 		context(context)
@ -203,7 +202,6 @@ ACTOR Future<Void> trackStorageServerQueueInfo( RatekeeperData* self, StorageSer
 				myQueueInfo->value.valid = true;
 				myQueueInfo->value.prevReply = myQueueInfo->value.lastReply;
 				myQueueInfo->value.lastReply = reply.get();
-				myQueueInfo->value.localRateLimit = reply.get().localRateLimit;
 				if (myQueueInfo->value.prevReply.instanceID != reply.get().instanceID) {
 					myQueueInfo->value.smoothDurableBytes.reset(reply.get().bytesDurable);
 					myQueueInfo->value.verySmoothDurableBytes.reset(reply.get().bytesDurable);
@ -376,8 +374,6 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
 	int64_t worstStorageQueueStorageServer = 0;
 	int64_t limitingStorageQueueStorageServer = 0;
 	int64_t worstDurabilityLag = 0;
-	double worstStorageLocalLimit = 0;
-	double limitingStorageLocalLimit = 0;

 	std::multimap<double, StorageQueueInfo*> storageTpsLimitReverseIndex;
 	std::multimap<int64_t, StorageQueueInfo*> storageDurabilityLagReverseIndex;
@ -408,7 +404,6 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {

 		int64_t storageQueue = ss.lastReply.bytesInput - ss.smoothDurableBytes.smoothTotal();
 		worstStorageQueueStorageServer = std::max(worstStorageQueueStorageServer, storageQueue);
-		worstStorageLocalLimit = std::min(worstStorageLocalLimit, ss.localRateLimit);

 		int64_t storageDurabilityLag = ss.smoothLatestVersion.smoothTotal() - ss.verySmoothDurableVersion.smoothTotal();
 		worstDurabilityLag = std::max(worstDurabilityLag, storageDurabilityLag);
@ -485,7 +480,6 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
 		}

 		limitingStorageQueueStorageServer = ss->second->lastReply.bytesInput - ss->second->smoothDurableBytes.smoothTotal();
-		limitingStorageLocalLimit = ss->second->lastReply.localRateLimit;
 		limits->tpsLimit = ss->first;
 		reasonID = storageTpsLimitReverseIndex.begin()->second->id; // Although we aren't controlling based on the worst SS, we still report it as the limiting process
 		limitReason = ssReasons[reasonID];
@ -679,14 +673,12 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
 			.detail("WorstFreeSpaceTLog", worstFreeSpaceTLog)
 			.detail("WorstStorageServerQueue", worstStorageQueueStorageServer)
 			.detail("LimitingStorageServerQueue", limitingStorageQueueStorageServer)
-			.detail("WorstStorageLocalLimit", worstStorageLocalLimit)
-			.detail("LimitingStorageLocalLimit", limitingStorageLocalLimit)
 			.detail("WorstTLogQueue", worstStorageQueueTLog)
 			.detail("TotalDiskUsageBytes", totalDiskUsageBytes)
 			.detail("WorstStorageServerVersionLag", worstVersionLag)
 			.detail("LimitingStorageServerVersionLag", limitingVersionLag)
-			.detail("WorstDurabilityLag", worstDurabilityLag)
-			.detail("LimitingDurabilityLag", limitingDurabilityLag)
+			.detail("WorstStorageServerDurabilityLag", worstDurabilityLag)
+			.detail("LimitingStorageServerDurabilityLag", limitingDurabilityLag)
 			.trackLatest(name.c_str());
 	}
 }
--- a/fdbserver/Status.actor.cpp
+++ b/fdbserver/Status.actor.cpp
@ -388,6 +388,13 @@ static JsonBuilderObject machineStatusFetcher(WorkerEvents mMetrics, vector<Work
 	return machineMap;
 }

+JsonBuilderObject getLagObject(int64_t versions) {
+	JsonBuilderObject lag;
+	lag["versions"] = versions;
+	lag["seconds"] = versions / (double)SERVER_KNOBS->VERSIONS_PER_SECOND;
+	return lag;
+}
+
 struct MachineMemoryInfo {
 	double memoryUsage;
 	double numProcesses;
@ -474,17 +481,8 @@ struct RolesInfo {
 				obj["read_latency_bands"] = addLatencyBandInfo(readLatencyMetrics);
 			}

-			JsonBuilderObject dataLag;
-			dataLag["versions"] = versionLag;
-			dataLagSeconds = versionLag / (double)SERVER_KNOBS->VERSIONS_PER_SECOND;
-			dataLag["seconds"] = dataLagSeconds;
-
-			JsonBuilderObject durabilityLag;
-			durabilityLag["versions"] = version - durableVersion;
-			durabilityLag["seconds"] = (version - durableVersion) / (double)SERVER_KNOBS->VERSIONS_PER_SECOND;
-
-			obj["data_lag"] = dataLag;
-			obj["durability_lag"] = durabilityLag;
+			obj["data_lag"] = getLagObject(versionLag);
+			obj["durability_lag"] = getLagObject(version - durableVersion);

 		} catch (Error& e) {
 			if(e.code() != error_code_attribute_not_found)
@ -1621,8 +1619,15 @@ ACTOR static Future<JsonBuilderObject> workloadStatusFetcher(Reference<AsyncVar<
 			(*data_overlay)["least_operating_space_bytes_storage_server"] = std::max(worstFreeSpaceStorageServer, (int64_t)0);
 			(*qos).setKeyRawNumber("worst_queue_bytes_storage_server", ratekeeper.getValue("WorstStorageServerQueue"));
 			(*qos).setKeyRawNumber("limiting_queue_bytes_storage_server", ratekeeper.getValue("LimitingStorageServerQueue"));
+
+			// TODO: These can be removed in the next release after 6.2
 			(*qos).setKeyRawNumber("worst_version_lag_storage_server", ratekeeper.getValue("WorstStorageServerVersionLag"));
 			(*qos).setKeyRawNumber("limiting_version_lag_storage_server", ratekeeper.getValue("LimitingStorageServerVersionLag"));
+
+			(*qos)["worst_data_lag_storage_server"] = getLagObject(ratekeeper.getInt64("WorstStorageServerVersionLag"));
+			(*qos)["limiting_data_lag_storage_server"] = getLagObject(ratekeeper.getInt64("LimitingStorageServerVersionLag"));
+			(*qos)["worst_durability_lag_storage_server"] = getLagObject(ratekeeper.getInt64("WorstStorageServerDurabilityLag"));
+			(*qos)["limiting_durability_lag_storage_server"] = getLagObject(ratekeeper.getInt64("LimitingStorageServerDurabilityLag"));
 		}

 		if(tlogCount > 0) {
@ -2315,11 +2320,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
 			incompatibleConnectionsArray.push_back(it.toString());
 		}
 		statusObj["incompatible_connections"] = incompatibleConnectionsArray;
-
-		StatusObject datacenterLag;
-		datacenterLag["versions"] = datacenterVersionDifference;
-		datacenterLag["seconds"] = datacenterVersionDifference / (double)SERVER_KNOBS->VERSIONS_PER_SECOND;
-		statusObj["datacenter_lag"] = datacenterLag;
+		statusObj["datacenter_lag"] = getLagObject(datacenterVersionDifference);

 		int totalDegraded = 0;
 		for(auto& it : workers) {
--- a/fdbserver/fdbserver.actor.cpp
+++ b/fdbserver/fdbserver.actor.cpp
@ -574,6 +574,9 @@ static void printUsage( const char *name, bool devhelp ) {
 		   "                 Delete the oldest log file when the total size of all log\n"
 		   "                 files exceeds SIZE bytes. If set to 0, old log files will not\n"
 		   "                 be deleted. The default value is 100MiB.\n");
+	printf("  --loggroup LOG_GROUP\n"
+	       "                 Sets the LogGroup field with the specified value for all\n"
+	       "                 events in the trace output (defaults to `default').\n");
 	printf("  --trace_format FORMAT\n"
 	       "                 Select the format of the log files. xml (the default) and json\n"
 	       "                 are supported.\n");