From b51beead53f4fd0300bbbd46ca96f2143f0d28de Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Thu, 9 Jul 2020 16:58:13 -0700 Subject: [PATCH 1/2] The backport of a change in later versions didn't include some updates to the schema and a change to the name of one of the messages. --- documentation/sphinx/source/mr-status-json-schemas.rst.inc | 4 +++- documentation/sphinx/source/mr-status.rst | 2 ++ fdbclient/Schemas.cpp | 4 +++- fdbserver/Status.actor.cpp | 2 +- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/documentation/sphinx/source/mr-status-json-schemas.rst.inc b/documentation/sphinx/source/mr-status-json-schemas.rst.inc index 479ecfab47..6de17ebade 100644 --- a/documentation/sphinx/source/mr-status-json-schemas.rst.inc +++ b/documentation/sphinx/source/mr-status-json-schemas.rst.inc @@ -362,7 +362,9 @@ "layer_status_incomplete", "database_availability_timeout", "consistencycheck_suspendkey_fetch_timeout", - "consistencycheck_disabled" + "consistencycheck_disabled", + "primary_dc_missing", + "fetch_primary_dc_timeout" ] }, "issues":[ diff --git a/documentation/sphinx/source/mr-status.rst b/documentation/sphinx/source/mr-status.rst index 9e11906e71..6ba4537285 100644 --- a/documentation/sphinx/source/mr-status.rst +++ b/documentation/sphinx/source/mr-status.rst @@ -88,6 +88,8 @@ cluster.messages unreachable_ratekeeper_worker Unab cluster.messages unreachable_processes The cluster has some unreachable processes. cluster.messages unreadable_configuration Unable to read database configuration. cluster.messages layer_status_incomplete Some or all of the layers subdocument could not be read. +cluster.messages primary_dc_missing Unable to determine primary datacenter. +cluster.messages fetch_primary_dc_timeout Fetching primary DC timed out. cluster.processes..messages file_open_error Unable to open ‘’ (). cluster.processes..messages incorrect_cluster_file_contents Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally. cluster.processes..messages io_error occured in diff --git a/fdbclient/Schemas.cpp b/fdbclient/Schemas.cpp index b9c8c34d0c..0b898977db 100644 --- a/fdbclient/Schemas.cpp +++ b/fdbclient/Schemas.cpp @@ -387,7 +387,9 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "consistencycheck_suspendkey_fetch_timeout", "consistencycheck_disabled", "duplicate_mutation_streams", - "duplicate_mutation_fetch_timeout" + "duplicate_mutation_fetch_timeout", + "primary_dc_missing", + "fetch_primary_dc_timeout" ] }, "issues":[ diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index 72ade55b7f..413087e5d9 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -2165,7 +2165,7 @@ ACTOR Future> getActivePrimaryDC(Database cx, JsonBuilderArray* } catch (Error& e) { if (e.code() == error_code_timed_out) { messages->push_back( - JsonString::makeMessage("fetch_primary_dc_timedout", "Fetching primary DC timed out.")); + JsonString::makeMessage("fetch_primary_dc_timeout", "Fetching primary DC timed out.")); return Optional(); } else { wait(tr.onError(e)); From 28b6281848aee0bbc550f553017efd06bab8d3bd Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Fri, 10 Jul 2020 08:52:59 -0700 Subject: [PATCH 2/2] Increase latency sample size by a factor of 10. --- fdbserver/Knobs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbserver/Knobs.cpp b/fdbserver/Knobs.cpp index f7409ff9ed..eddf4c5a95 100644 --- a/fdbserver/Knobs.cpp +++ b/fdbserver/Knobs.cpp @@ -531,7 +531,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula init( TIME_KEEPER_MAX_ENTRIES, 3600 * 24 * 30 * 6 ); if( randomize && BUGGIFY ) { TIME_KEEPER_MAX_ENTRIES = 2; } // Server request latency measurement - init( LATENCY_SAMPLE_SIZE, 10000 ); + init( LATENCY_SAMPLE_SIZE, 100000 ); init( LATENCY_METRICS_LOGGING_INTERVAL, 60.0 ); // clang-format on