From daea43bdca4b88aece7e0502ff1eeefedf7f3189 Mon Sep 17 00:00:00 2001 From: John Brownlee Date: Mon, 11 Jun 2018 11:13:44 -0700 Subject: [PATCH 01/26] Add administration and TLS sections to the site map. #264 --- documentation/sphinx/source/administration.rst | 1 + documentation/sphinx/source/index.rst | 1 + 2 files changed, 2 insertions(+) diff --git a/documentation/sphinx/source/administration.rst b/documentation/sphinx/source/administration.rst index 5cf4d28274..10749bee10 100644 --- a/documentation/sphinx/source/administration.rst +++ b/documentation/sphinx/source/administration.rst @@ -10,6 +10,7 @@ Administration :titlesonly: moving-a-cluster + tls This document covers the administration of an existing FoundationDB cluster. We recommend you read this document before setting up a cluster for performance testing or production use. diff --git a/documentation/sphinx/source/index.rst b/documentation/sphinx/source/index.rst index af11c6dd9e..da6b1b8901 100644 --- a/documentation/sphinx/source/index.rst +++ b/documentation/sphinx/source/index.rst @@ -50,4 +50,5 @@ The latest changes are detailed in :doc:`release-notes`. The documentation has t design-recipes api-reference tutorials + administration earlier-release-notes From 8ae25bed5c6404be4e9efca47030c2ee960c7c67 Mon Sep 17 00:00:00 2001 From: xmeng Date: Tue, 12 Jun 2018 21:01:19 +0100 Subject: [PATCH 02/26] Fix a concurrency bug in Java queue example firstItem() should be in the same transaction of clear() --- documentation/sphinx/source/queues-java.rst | 17 ++++++++--------- recipes/java-recipes/MicroQueue.java | 17 ++++++++--------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/documentation/sphinx/source/queues-java.rst b/documentation/sphinx/source/queues-java.rst index 3d5019606d..4e141d0202 100644 --- a/documentation/sphinx/source/queues-java.rst +++ b/documentation/sphinx/source/queues-java.rst @@ -81,19 +81,18 @@ The following is a simple implementation of the basic pattern: // Remove the top element from the queue. public static Object dequeue(TransactionContext tcx){ - final KeyValue item = firstItem(tcx); - if(item == null){ - return null; - } - // Remove from the top of the queue. - tcx.run((Transaction tr) -> { + return tcx.run((Transaction tr) -> { + final KeyValue item = firstItem(tr); + if(item == null){ + return null; + } + tr.clear(item.getKey()); - return null; + // Return the old value. + return Tuple.fromBytes(item.getValue()).get(0); }); - // Return the old value. - return Tuple.fromBytes(item.getValue()).get(0); } // Add an element to the queue. diff --git a/recipes/java-recipes/MicroQueue.java b/recipes/java-recipes/MicroQueue.java index a3a805a067..4b0e0fbc71 100644 --- a/recipes/java-recipes/MicroQueue.java +++ b/recipes/java-recipes/MicroQueue.java @@ -44,21 +44,20 @@ public class MicroQueue { // Remove the top element from the queue. public static Object dequeue(TransactionContext tcx){ - final KeyValue item = firstItem(tcx); - if(item == null){ - return null; - } - // Remove from the top of the queue. - tcx.run(new Function(){ + return tcx.run(new Function(){ public Void apply(Transaction tr){ + final KeyValue item = firstItem(tr); + if(item == null){ + return null; + } + tr.clear(item.getKey()); - return null; + // Return the old value. + return Tuple.fromBytes(item.getValue()).get(0); } }); - // Return the old value. - return Tuple.fromBytes(item.getValue()).get(0); } // Add an element to the queue. From 6f941a89b4ea2abe331e43d5ff95291c85cae8ec Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Wed, 13 Jun 2018 10:15:20 -0700 Subject: [PATCH 03/26] Fix bug in actor compiler that would cause multi-line comments to be marked with the wrong line numbers. --- flow/actorcompiler/ActorParser.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flow/actorcompiler/ActorParser.cs b/flow/actorcompiler/ActorParser.cs index c0b374c85f..e4b1391bef 100644 --- a/flow/actorcompiler/ActorParser.cs +++ b/flow/actorcompiler/ActorParser.cs @@ -874,13 +874,13 @@ namespace actorcompiler case "\r\n": LineCount++; break; case "\n": LineCount++; break; } - if (tokens[i].Value.StartsWith("/*")) LineCount += tokens[i].Value.Count(c=>c=='\n'); if (BraceDepth < 0) throw new Error(LineCount, "Mismatched braces"); if (ParenDepth < 0) throw new Error(LineCount, "Mismatched parenthesis"); tokens[i].Position = i; tokens[i].SourceLine = LineCount; tokens[i].BraceDepth = BraceDepth; tokens[i].ParenDepth = ParenDepth; + if (tokens[i].Value.StartsWith("/*")) LineCount += tokens[i].Value.Count(c=>c=='\n'); switch (tokens[i].Value) { case "{": BraceDepth++; if (BraceDepth==1) lastBrace = tokens[i]; break; From 4a87a6c8e1b21ce37c09b66295aaf5607bf27591 Mon Sep 17 00:00:00 2001 From: LingBin Date: Thu, 14 Jun 2018 12:28:31 +0800 Subject: [PATCH 04/26] Fix the indentation of the administration document Incorrect indentation can lead to incorrect line breaks. In sphinx, the source code needs to be indented. commit to the release-5.2 branch, which is where the next version of the website documentation is likely to be built from. --- .../sphinx/source/administration.rst | 318 +++++++++--------- 1 file changed, 159 insertions(+), 159 deletions(-) diff --git a/documentation/sphinx/source/administration.rst b/documentation/sphinx/source/administration.rst index 5c0eb13c88..fd87c7a21c 100644 --- a/documentation/sphinx/source/administration.rst +++ b/documentation/sphinx/source/administration.rst @@ -228,46 +228,45 @@ Use the ``status`` command of ``fdbcli`` to determine if the cluster is up and r The database is available. Welcome to the fdbcli. For help, type `help'. -fdb> status + fdb> status -Configuration: - Redundancy mode - triple - Storage engine - ssd-2 - Coordinators - 5 - Desired Proxies - 5 - Desired Logs - 8 + Configuration: + Redundancy mode - triple + Storage engine - ssd-2 + Coordinators - 5 + Desired Proxies - 5 + Desired Logs - 8 -Cluster: - FoundationDB processes - 272 - Machines - 16 - Memory availability - 14.5 GB per process on machine with least available - Retransmissions rate - 20 Hz - Fault Tolerance - 2 machines - Server time - 03/19/18 08:51:52 - -Data: - Replication health - Healthy - Moving data - 0.000 GB - Sum of key-value sizes - 3.298 TB - Disk space used - 15.243 TB - -Operating space: - Storage server - 1656.2 GB free on most full server - Log server - 1794.7 GB free on most full server - -Workload: - Read rate - 55990 Hz - Write rate - 14946 Hz - Transactions started - 6321 Hz - Transactions committed - 1132 Hz - Conflict rate - 0 Hz - -Backup and DR: - Running backups - 1 - Running DRs - 1 as primary - -Client time: 03/19/18 08:51:51 + Cluster: + FoundationDB processes - 272 + Machines - 16 + Memory availability - 14.5 GB per process on machine with least available + Retransmissions rate - 20 Hz + Fault Tolerance - 2 machines + Server time - 03/19/18 08:51:52 + Data: + Replication health - Healthy + Moving data - 0.000 GB + Sum of key-value sizes - 3.298 TB + Disk space used - 15.243 TB + + Operating space: + Storage server - 1656.2 GB free on most full server + Log server - 1794.7 GB free on most full server + + Workload: + Read rate - 55990 Hz + Write rate - 14946 Hz + Transactions started - 6321 Hz + Transactions committed - 1132 Hz + Conflict rate - 0 Hz + + Backup and DR: + Running backups - 1 + Running DRs - 1 as primary + + Client time: 03/19/18 08:51:51 The summary fields are interpreted as follows: @@ -327,131 +326,132 @@ The ``status`` command can provide detailed statistics about the cluster and the fdb> status details -Configuration: - Redundancy mode - triple - Storage engine - ssd-2 - Coordinators - 5 + Configuration: + Redundancy mode - triple + Storage engine - ssd-2 + Coordinators - 5 + + Cluster: + FoundationDB processes - 85 + Machines - 5 + Memory availability - 7.4 GB per process on machine with least available + Retransmissions rate - 5 Hz + Fault Tolerance - 2 machines + Server time - 03/19/18 08:59:37 + + Data: + Replication health - Healthy + Moving data - 0.000 GB + Sum of key-value sizes - 87.068 GB + Disk space used - 327.819 GB + + Operating space: + Storage server - 888.2 GB free on most full server + Log server - 897.3 GB free on most full server + + Workload: + Read rate - 117 Hz + Write rate - 0 Hz + Transactions started - 43 Hz + Transactions committed - 1 Hz + Conflict rate - 0 Hz -Cluster: - FoundationDB processes - 85 - Machines - 5 - Memory availability - 7.4 GB per process on machine with least available - Retransmissions rate - 5 Hz - Fault Tolerance - 2 machines - Server time - 03/19/18 08:59:37 + Process performance details: + 10.0.4.1:4500 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 3.2 GB / 7.4 GB RAM ) + 10.0.4.1:4501 ( 1% cpu; 2% machine; 0.010 Gbps; 3% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.1:4502 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.1:4503 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.1:4504 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.1:4505 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.1:4506 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.1:4507 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.1:4508 ( 2% cpu; 2% machine; 0.010 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.1:4509 ( 2% cpu; 2% machine; 0.010 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.1:4510 ( 1% cpu; 2% machine; 0.010 Gbps; 1% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.1:4511 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.1:4512 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.1:4513 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.1:4514 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) + 10.0.4.1:4515 ( 12% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) + 10.0.4.1:4516 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 0.3 GB / 7.4 GB RAM ) + 10.0.4.2:4500 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 3.2 GB / 7.4 GB RAM ) + 10.0.4.2:4501 ( 15% cpu; 3% machine; 0.124 Gbps; 19% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4502 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4503 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4504 ( 2% cpu; 3% machine; 0.124 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4505 ( 18% cpu; 3% machine; 0.124 Gbps; 18% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4506 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4507 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4508 ( 2% cpu; 3% machine; 0.124 Gbps; 19% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4509 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4510 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4511 ( 2% cpu; 3% machine; 0.124 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4512 ( 2% cpu; 3% machine; 0.124 Gbps; 19% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.2:4513 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.2:4514 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) + 10.0.4.2:4515 ( 11% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) + 10.0.4.2:4516 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 0.6 GB / 7.4 GB RAM ) + 10.0.4.3:4500 ( 14% cpu; 3% machine; 0.284 Gbps; 26% disk IO; 3.0 GB / 7.4 GB RAM ) + 10.0.4.3:4501 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.8 GB / 7.4 GB RAM ) + 10.0.4.3:4502 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.8 GB / 7.4 GB RAM ) + 10.0.4.3:4503 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.3:4504 ( 7% cpu; 3% machine; 0.284 Gbps; 12% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.3:4505 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.3:4506 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.3:4507 ( 2% cpu; 3% machine; 0.284 Gbps; 26% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.3:4508 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.3:4509 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.3:4510 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.3:4511 ( 2% cpu; 3% machine; 0.284 Gbps; 12% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.3:4512 ( 2% cpu; 3% machine; 0.284 Gbps; 3% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.3:4513 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.3:4514 ( 0% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 0.1 GB / 7.4 GB RAM ) + 10.0.4.3:4515 ( 0% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 0.1 GB / 7.4 GB RAM ) + 10.0.4.3:4516 ( 0% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 0.1 GB / 7.4 GB RAM ) + 10.0.4.4:4500 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 3.2 GB / 7.4 GB RAM ) + 10.0.4.4:4501 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.4:4502 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.4:4503 ( 2% cpu; 4% machine; 0.065 Gbps; 16% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.4:4504 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.4:4505 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.4:4506 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.4:4507 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.4:4508 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.4:4509 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.4:4510 ( 24% cpu; 4% machine; 0.065 Gbps; 15% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.4:4511 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.8 GB / 7.4 GB RAM ) + 10.0.4.4:4512 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.4:4513 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.4:4514 ( 0% cpu; 4% machine; 0.065 Gbps; 1% disk IO; 0.2 GB / 7.4 GB RAM ) + 10.0.4.4:4515 ( 0% cpu; 4% machine; 0.065 Gbps; 1% disk IO; 0.2 GB / 7.4 GB RAM ) + 10.0.4.4:4516 ( 0% cpu; 4% machine; 0.065 Gbps; 1% disk IO; 0.6 GB / 7.4 GB RAM ) + 10.0.4.5:4500 ( 6% cpu; 2% machine; 0.076 Gbps; 7% disk IO; 3.2 GB / 7.4 GB RAM ) + 10.0.4.5:4501 ( 2% cpu; 2% machine; 0.076 Gbps; 19% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.5:4502 ( 1% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.5:4503 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.5:4504 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.5:4505 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.5:4506 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.5:4507 ( 2% cpu; 2% machine; 0.076 Gbps; 6% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.5:4508 ( 31% cpu; 2% machine; 0.076 Gbps; 8% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.5:4509 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.5:4510 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) + 10.0.4.5:4511 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.5:4512 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.5:4513 ( 0% cpu; 2% machine; 0.076 Gbps; 3% disk IO; 2.6 GB / 7.4 GB RAM ) + 10.0.4.5:4514 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) + 10.0.4.5:4515 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) + 10.0.4.5:4516 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 0.6 GB / 7.4 GB RAM ) -Data: - Replication health - Healthy - Moving data - 0.000 GB - Sum of key-value sizes - 87.068 GB - Disk space used - 327.819 GB + Coordination servers: + 10.0.4.1:4500 (reachable) + 10.0.4.2:4500 (reachable) + 10.0.4.3:4500 (reachable) + 10.0.4.4:4500 (reachable) + 10.0.4.5:4500 (reachable) + + Client time: 03/19/18 08:59:37 -Operating space: - Storage server - 888.2 GB free on most full server - Log server - 897.3 GB free on most full server - -Workload: - Read rate - 117 Hz - Write rate - 0 Hz - Transactions started - 43 Hz - Transactions committed - 1 Hz - Conflict rate - 0 Hz - -Process performance details: - 10.0.4.1:4500 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 3.2 GB / 7.4 GB RAM ) - 10.0.4.1:4501 ( 1% cpu; 2% machine; 0.010 Gbps; 3% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.1:4502 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.1:4503 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.1:4504 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.1:4505 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.1:4506 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.1:4507 ( 2% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.1:4508 ( 2% cpu; 2% machine; 0.010 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.1:4509 ( 2% cpu; 2% machine; 0.010 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.1:4510 ( 1% cpu; 2% machine; 0.010 Gbps; 1% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.1:4511 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.1:4512 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.1:4513 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.1:4514 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) - 10.0.4.1:4515 ( 12% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) - 10.0.4.1:4516 ( 0% cpu; 2% machine; 0.010 Gbps; 0% disk IO; 0.3 GB / 7.4 GB RAM ) - 10.0.4.2:4500 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 3.2 GB / 7.4 GB RAM ) - 10.0.4.2:4501 ( 15% cpu; 3% machine; 0.124 Gbps; 19% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4502 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4503 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4504 ( 2% cpu; 3% machine; 0.124 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4505 ( 18% cpu; 3% machine; 0.124 Gbps; 18% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4506 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4507 ( 2% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4508 ( 2% cpu; 3% machine; 0.124 Gbps; 19% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4509 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4510 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4511 ( 2% cpu; 3% machine; 0.124 Gbps; 1% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4512 ( 2% cpu; 3% machine; 0.124 Gbps; 19% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.2:4513 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.2:4514 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) - 10.0.4.2:4515 ( 11% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) - 10.0.4.2:4516 ( 0% cpu; 3% machine; 0.124 Gbps; 0% disk IO; 0.6 GB / 7.4 GB RAM ) - 10.0.4.3:4500 ( 14% cpu; 3% machine; 0.284 Gbps; 26% disk IO; 3.0 GB / 7.4 GB RAM ) - 10.0.4.3:4501 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.8 GB / 7.4 GB RAM ) - 10.0.4.3:4502 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.8 GB / 7.4 GB RAM ) - 10.0.4.3:4503 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.3:4504 ( 7% cpu; 3% machine; 0.284 Gbps; 12% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.3:4505 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.3:4506 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.3:4507 ( 2% cpu; 3% machine; 0.284 Gbps; 26% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.3:4508 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.3:4509 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.3:4510 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.3:4511 ( 2% cpu; 3% machine; 0.284 Gbps; 12% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.3:4512 ( 2% cpu; 3% machine; 0.284 Gbps; 3% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.3:4513 ( 2% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.3:4514 ( 0% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 0.1 GB / 7.4 GB RAM ) - 10.0.4.3:4515 ( 0% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 0.1 GB / 7.4 GB RAM ) - 10.0.4.3:4516 ( 0% cpu; 3% machine; 0.284 Gbps; 0% disk IO; 0.1 GB / 7.4 GB RAM ) - 10.0.4.4:4500 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 3.2 GB / 7.4 GB RAM ) - 10.0.4.4:4501 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.4:4502 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.4:4503 ( 2% cpu; 4% machine; 0.065 Gbps; 16% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.4:4504 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.4:4505 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.4:4506 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.4:4507 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.4:4508 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.4:4509 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.4:4510 ( 24% cpu; 4% machine; 0.065 Gbps; 15% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.4:4511 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.8 GB / 7.4 GB RAM ) - 10.0.4.4:4512 ( 2% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.4:4513 ( 0% cpu; 4% machine; 0.065 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.4:4514 ( 0% cpu; 4% machine; 0.065 Gbps; 1% disk IO; 0.2 GB / 7.4 GB RAM ) - 10.0.4.4:4515 ( 0% cpu; 4% machine; 0.065 Gbps; 1% disk IO; 0.2 GB / 7.4 GB RAM ) - 10.0.4.4:4516 ( 0% cpu; 4% machine; 0.065 Gbps; 1% disk IO; 0.6 GB / 7.4 GB RAM ) - 10.0.4.5:4500 ( 6% cpu; 2% machine; 0.076 Gbps; 7% disk IO; 3.2 GB / 7.4 GB RAM ) - 10.0.4.5:4501 ( 2% cpu; 2% machine; 0.076 Gbps; 19% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.5:4502 ( 1% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.5:4503 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.5:4504 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.5:4505 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.5:4506 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.5:4507 ( 2% cpu; 2% machine; 0.076 Gbps; 6% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.5:4508 ( 31% cpu; 2% machine; 0.076 Gbps; 8% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.5:4509 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.5:4510 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.7 GB / 7.4 GB RAM ) - 10.0.4.5:4511 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.5:4512 ( 2% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.5:4513 ( 0% cpu; 2% machine; 0.076 Gbps; 3% disk IO; 2.6 GB / 7.4 GB RAM ) - 10.0.4.5:4514 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) - 10.0.4.5:4515 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 0.2 GB / 7.4 GB RAM ) - 10.0.4.5:4516 ( 0% cpu; 2% machine; 0.076 Gbps; 0% disk IO; 0.6 GB / 7.4 GB RAM ) - -Coordination servers: - 10.0.4.1:4500 (reachable) - 10.0.4.2:4500 (reachable) - 10.0.4.3:4500 (reachable) - 10.0.4.4:4500 (reachable) - 10.0.4.5:4500 (reachable) - -Client time: 03/19/18 08:59:37 Several details about individual FoundationDB processes are displayed in a list format in parenthesis after the IP address and port: ======= ========================================================================= From e0c72b31f4eaf71efef09ee98183458b1e9ed9e2 Mon Sep 17 00:00:00 2001 From: Richard Low Date: Tue, 19 Jun 2018 13:42:18 -0700 Subject: [PATCH 05/26] Add UID and DC as additional subject fields for TLS peer validation --- FDBLibTLS/FDBLibTLSVerify.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FDBLibTLS/FDBLibTLSVerify.cpp b/FDBLibTLS/FDBLibTLSVerify.cpp index b951ab5810..0c28a2f036 100644 --- a/FDBLibTLS/FDBLibTLSVerify.cpp +++ b/FDBLibTLS/FDBLibTLSVerify.cpp @@ -136,7 +136,7 @@ static std::pair splitPair(std::string const& input, c static int abbrevToNID(std::string const& sn) { int nid = NID_undef; - if (sn == "C" || sn == "CN" || sn == "L" || sn == "ST" || sn == "O" || sn == "OU") + if (sn == "C" || sn == "CN" || sn == "L" || sn == "ST" || sn == "O" || sn == "OU" || sn == "UID" || sn == "DC") nid = OBJ_sn2nid(sn.c_str()); if (nid == NID_undef) throw std::runtime_error("abbrevToNID"); From fff6a47c43b182aa29a6ff171b33005fec3cc22b Mon Sep 17 00:00:00 2001 From: Richard Low Date: Wed, 20 Jun 2018 14:04:03 -0700 Subject: [PATCH 06/26] Validate certiicates by default --- fdbrpc/TLSConnection.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbrpc/TLSConnection.actor.cpp b/fdbrpc/TLSConnection.actor.cpp index 965703491d..39bedf5355 100644 --- a/fdbrpc/TLSConnection.actor.cpp +++ b/fdbrpc/TLSConnection.actor.cpp @@ -323,7 +323,7 @@ Reference TLSOptions::get_policy(PolicyType type) { if (platform::getEnvironmentVar("FDB_TLS_VERIFY_PEERS", verifyPeerString)) set_verify_peers({ verifyPeerString }); else - set_verify_peers({ std::string("Check.Valid=0")}); + set_verify_peers({ std::string("Check.Valid=1")}); } if (!ca_set) { std::string caFile; From b161e25fbd4a893fae2f15a45a05c1432f3b0ff4 Mon Sep 17 00:00:00 2001 From: Richard Low Date: Wed, 20 Jun 2018 14:41:31 -0700 Subject: [PATCH 07/26] Update relese notes --- documentation/sphinx/source/release-notes.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index f81e95a84c..a1d16413aa 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -2,6 +2,14 @@ Release Notes ############# +5.2.5 +===== + +Fixes +----- + +* Don't disable certificate checks by default. `(PR #511) https://github.com/apple/foundationdb/pull/511`_ + 5.2.4 ===== From 361e3357301b93de496a81cf07d2d2eb4e7ec416 Mon Sep 17 00:00:00 2001 From: Richard Low Date: Wed, 20 Jun 2018 16:26:11 -0700 Subject: [PATCH 08/26] Disable cert validation in simulation --- fdbserver/SimulatedCluster.actor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/fdbserver/SimulatedCluster.actor.cpp b/fdbserver/SimulatedCluster.actor.cpp index d4466eb7c4..70a651c30d 100644 --- a/fdbserver/SimulatedCluster.actor.cpp +++ b/fdbserver/SimulatedCluster.actor.cpp @@ -120,6 +120,7 @@ static void simInitTLS() { Reference options( new TLSOptions ); options->set_cert_data( certBytes ); options->set_key_data( certBytes ); + options->set_verify_peers(std::vector(1, "Check.Valid=0")); options->register_network(); } From e9e1e194f03b6e2d9963fd91d15ccc0dac3812e0 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Wed, 20 Jun 2018 20:34:34 -0700 Subject: [PATCH 09/26] Added operation-specific rate controls to blob store interface. --- fdbclient/Knobs.cpp | 5 +++++ fdbclient/Knobs.h | 4 ++++ fdbrpc/BlobStore.actor.cpp | 30 +++++++++++++++++++++++++++++- fdbrpc/BlobStore.h | 16 ++++++++++++++++ 4 files changed, 54 insertions(+), 1 deletion(-) diff --git a/fdbclient/Knobs.cpp b/fdbclient/Knobs.cpp index 3a2974ba57..cfc55addb2 100644 --- a/fdbclient/Knobs.cpp +++ b/fdbclient/Knobs.cpp @@ -164,6 +164,11 @@ ClientKnobs::ClientKnobs(bool randomize) { init( BLOBSTORE_MAX_SEND_BYTES_PER_SECOND, 1e9 ); init( BLOBSTORE_MAX_RECV_BYTES_PER_SECOND, 1e9 ); + init( BLOBSTORE_LIST_REQUESTS_PER_SECOND, 25 ); + init( BLOBSTORE_WRITE_REQUESTS_PER_SECOND, 50 ); + init( BLOBSTORE_READ_REQUESTS_PER_SECOND, 100 ); + init( BLOBSTORE_DELETE_REQUESTS_PER_SECOND, 100 ); + // Client Status Info init(CSI_SAMPLING_PROBABILITY, -1.0); init(CSI_SIZE_LIMIT, std::numeric_limits::max()); diff --git a/fdbclient/Knobs.h b/fdbclient/Knobs.h index 3b69170ae2..2e24e197bb 100644 --- a/fdbclient/Knobs.h +++ b/fdbclient/Knobs.h @@ -151,6 +151,10 @@ public: int BLOBSTORE_REQUEST_TRIES; int BLOBSTORE_REQUEST_TIMEOUT; int BLOBSTORE_REQUESTS_PER_SECOND; + int BLOBSTORE_LIST_REQUESTS_PER_SECOND; + int BLOBSTORE_WRITE_REQUESTS_PER_SECOND; + int BLOBSTORE_READ_REQUESTS_PER_SECOND; + int BLOBSTORE_DELETE_REQUESTS_PER_SECOND; int BLOBSTORE_CONCURRENT_REQUESTS; int BLOBSTORE_MULTIPART_MAX_PART_SIZE; int BLOBSTORE_MULTIPART_MIN_PART_SIZE; diff --git a/fdbrpc/BlobStore.actor.cpp b/fdbrpc/BlobStore.actor.cpp index f1ab6a95d3..f8229e4cca 100644 --- a/fdbrpc/BlobStore.actor.cpp +++ b/fdbrpc/BlobStore.actor.cpp @@ -57,6 +57,10 @@ BlobStoreEndpoint::BlobKnobs::BlobKnobs() { request_timeout = CLIENT_KNOBS->BLOBSTORE_REQUEST_TIMEOUT; requests_per_second = CLIENT_KNOBS->BLOBSTORE_REQUESTS_PER_SECOND; concurrent_requests = CLIENT_KNOBS->BLOBSTORE_CONCURRENT_REQUESTS; + list_requests_per_second = CLIENT_KNOBS->BLOBSTORE_LIST_REQUESTS_PER_SECOND; + write_requests_per_second = CLIENT_KNOBS->BLOBSTORE_WRITE_REQUESTS_PER_SECOND; + read_requests_per_second = CLIENT_KNOBS->BLOBSTORE_READ_REQUESTS_PER_SECOND; + delete_requests_per_second = CLIENT_KNOBS->BLOBSTORE_DELETE_REQUESTS_PER_SECOND; multipart_max_part_size = CLIENT_KNOBS->BLOBSTORE_MULTIPART_MAX_PART_SIZE; multipart_min_part_size = CLIENT_KNOBS->BLOBSTORE_MULTIPART_MIN_PART_SIZE; concurrent_uploads = CLIENT_KNOBS->BLOBSTORE_CONCURRENT_UPLOADS; @@ -79,6 +83,10 @@ bool BlobStoreEndpoint::BlobKnobs::set(StringRef name, int value) { TRY_PARAM(request_tries, rt); TRY_PARAM(request_timeout, rto); TRY_PARAM(requests_per_second, rps); + TRY_PARAM(list_requests_per_second, lrps); + TRY_PARAM(write_requests_per_second, wrps); + TRY_PARAM(read_requests_per_second, rrps); + TRY_PARAM(delete_requests_per_second, drps); TRY_PARAM(concurrent_requests, cr); TRY_PARAM(multipart_max_part_size, maxps); TRY_PARAM(multipart_min_part_size, minps); @@ -107,6 +115,10 @@ std::string BlobStoreEndpoint::BlobKnobs::getURLParameters() const { _CHECK_PARAM(request_tries, rt); _CHECK_PARAM(request_timeout, rto); _CHECK_PARAM(requests_per_second, rps); + _CHECK_PARAM(list_requests_per_second, lrps); + _CHECK_PARAM(write_requests_per_second, wrps); + _CHECK_PARAM(read_requests_per_second, rrps); + _CHECK_PARAM(delete_requests_per_second, drps); _CHECK_PARAM(concurrent_requests, cr); _CHECK_PARAM(multipart_max_part_size, maxps); _CHECK_PARAM(multipart_min_part_size, minps); @@ -195,6 +207,8 @@ std::string BlobStoreEndpoint::getResourceURL(std::string resource) { } ACTOR Future objectExists_impl(Reference b, std::string bucket, std::string object) { + Void _ = wait(b->requestRateRead->getAllowance(1)); + std::string resource = std::string("/") + bucket + "/" + object; HTTP::Headers headers; @@ -207,6 +221,8 @@ Future BlobStoreEndpoint::objectExists(std::string const &bucket, std::str } ACTOR Future deleteObject_impl(Reference b, std::string bucket, std::string object) { + Void _ = wait(b->requestRateDelete->getAllowance(1)); + std::string resource = std::string("/") + bucket + "/" + object; HTTP::Headers headers; Reference r = wait(b->doRequest("DELETE", resource, headers, NULL, 0, {200, 204, 404})); @@ -273,9 +289,10 @@ Future BlobStoreEndpoint::deleteRecursively(std::string const &bucket, std } ACTOR Future createBucket_impl(Reference b, std::string bucket) { + Void _ = wait(b->requestRateWrite->getAllowance(1)); + std::string resource = std::string("/") + bucket; HTTP::Headers headers; - Reference r = wait(b->doRequest("PUT", resource, headers, NULL, 0, {200, 409})); return Void(); } @@ -285,6 +302,8 @@ Future BlobStoreEndpoint::createBucket(std::string const &bucket) { } ACTOR Future objectSize_impl(Reference b, std::string bucket, std::string object) { + Void _ = wait(b->requestRateRead->getAllowance(1)); + std::string resource = std::string("/") + bucket + "/" + object; HTTP::Headers headers; @@ -789,6 +808,8 @@ void BlobStoreEndpoint::setAuthHeaders(std::string const &verb, std::string cons } ACTOR Future readEntireFile_impl(Reference bstore, std::string bucket, std::string object) { + Void _ = wait(bstore->requestRateRead->getAllowance(1)); + std::string resource = std::string("/") + bucket + "/" + object; HTTP::Headers headers; Reference r = wait(bstore->doRequest("GET", resource, headers, NULL, 0, {200, 404})); @@ -805,6 +826,7 @@ ACTOR Future writeEntireFileFromBuffer_impl(Reference b if(contentLen > bstore->knobs.multipart_max_part_size) throw file_too_large(); + Void _ = wait(bstore->requestRateWrite->getAllowance(1)); Void _ = wait(bstore->concurrentUploads.take()); state FlowLock::Releaser uploadReleaser(bstore->concurrentUploads, 1); @@ -856,6 +878,8 @@ Future BlobStoreEndpoint::writeEntireFileFromBuffer(std::string const &buc ACTOR Future readObject_impl(Reference bstore, std::string bucket, std::string object, void *data, int length, int64_t offset) { if(length <= 0) return 0; + Void _ = wait(bstore->requestRateRead->getAllowance(1)); + std::string resource = std::string("/") + bucket + "/" + object; HTTP::Headers headers; headers["Range"] = format("bytes=%lld-%lld", offset, offset + length - 1); @@ -874,6 +898,8 @@ Future BlobStoreEndpoint::readObject(std::string const &bucket, std::string } ACTOR static Future beginMultiPartUpload_impl(Reference bstore, std::string bucket, std::string object) { + Void _ = wait(bstore->requestRateWrite->getAllowance(1)); + std::string resource = std::string("/") + bucket + "/" + object + "?uploads"; HTTP::Headers headers; Reference r = wait(bstore->doRequest("POST", resource, headers, NULL, 0, {200})); @@ -892,6 +918,7 @@ Future BlobStoreEndpoint::beginMultiPartUpload(std::string const &b } ACTOR Future uploadPart_impl(Reference bstore, std::string bucket, std::string object, std::string uploadID, unsigned int partNumber, UnsentPacketQueue *pContent, int contentLen, std::string contentMD5) { + Void _ = wait(bstore->requestRateWrite->getAllowance(1)); Void _ = wait(bstore->concurrentUploads.take()); state FlowLock::Releaser uploadReleaser(bstore->concurrentUploads, 1); @@ -921,6 +948,7 @@ Future BlobStoreEndpoint::uploadPart(std::string const &bucket, std ACTOR Future finishMultiPartUpload_impl(Reference bstore, std::string bucket, std::string object, std::string uploadID, BlobStoreEndpoint::MultiPartSetT parts) { state UnsentPacketQueue part_list(); // NonCopyable state var so must be declared at top of actor + Void _ = wait(bstore->requestRateWrite->getAllowance(1)); std::string manifest = ""; for(auto &p : parts) diff --git a/fdbrpc/BlobStore.h b/fdbrpc/BlobStore.h index 000f0569d0..8adfbe83a2 100644 --- a/fdbrpc/BlobStore.h +++ b/fdbrpc/BlobStore.h @@ -55,6 +55,10 @@ public: request_tries, request_timeout, requests_per_second, + list_requests_per_second, + write_requests_per_second, + read_requests_per_second, + delete_requests_per_second, multipart_max_part_size, multipart_min_part_size, concurrent_requests, @@ -78,6 +82,10 @@ public: "request_tries (or rt) Number of times to try each request until a parseable HTTP response other than 429 is received.", "request_timeout (or rto) Number of seconds to wait for a request to succeed after a connection is established.", "requests_per_second (or rps) Max number of requests to start per second.", + "list_requests_per_second (or lrps) Max number of list requests to start per second.", + "write_requests_per_second (or wrps) Max number of write requests to start per second.", + "read_requests_per_second (or rrps) Max number of read requests to start per second.", + "delete_requests_per_second (or drps) Max number of delete requests to start per second.", "multipart_max_part_size (or maxps) Max part size for multipart uploads.", "multipart_min_part_size (or minps) Min part size for multipart uploads.", "concurrent_requests (or cr) Max number of total requests in progress at once, regardless of operation-specific concurrency limits.", @@ -97,6 +105,10 @@ public: BlobStoreEndpoint(std::string const &host, std::string service, std::string const &key, std::string const &secret, BlobKnobs const &knobs = BlobKnobs()) : host(host), service(service), key(key), secret(secret), lookupSecret(secret.empty()), knobs(knobs), requestRate(new SpeedLimit(knobs.requests_per_second, 1)), + requestRateList(new SpeedLimit(knobs.list_requests_per_second, 1)), + requestRateWrite(new SpeedLimit(knobs.write_requests_per_second, 1)), + requestRateRead(new SpeedLimit(knobs.read_requests_per_second, 1)), + requestRateDelete(new SpeedLimit(knobs.delete_requests_per_second, 1)), sendRate(new SpeedLimit(knobs.max_send_bytes_per_second, 1)), recvRate(new SpeedLimit(knobs.max_recv_bytes_per_second, 1)), concurrentRequests(knobs.concurrent_requests), @@ -135,6 +147,10 @@ public: // Speed and concurrency limits Reference requestRate; + Reference requestRateList; + Reference requestRateWrite; + Reference requestRateRead; + Reference requestRateDelete; Reference sendRate; Reference recvRate; FlowLock concurrentRequests; From d9f3eb05a2117fd87741c09829b4a19857425934 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Thu, 21 Jun 2018 11:13:31 -0700 Subject: [PATCH 10/26] Change default delete operations per second. Updated release notes. --- documentation/sphinx/source/release-notes.rst | 6 ++++++ fdbclient/Knobs.cpp | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index a1d16413aa..1e0f4caa26 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -5,11 +5,17 @@ Release Notes 5.2.5 ===== +Features +-------- + +* Added knobs and blob Backup URL parameters for operations/sec limits by operation type. `(PR #513) https://github.com/apple/foundationdb/pull/513`_ + Fixes ----- * Don't disable certificate checks by default. `(PR #511) https://github.com/apple/foundationdb/pull/511`_ + 5.2.4 ===== diff --git a/fdbclient/Knobs.cpp b/fdbclient/Knobs.cpp index cfc55addb2..9fd318e09c 100644 --- a/fdbclient/Knobs.cpp +++ b/fdbclient/Knobs.cpp @@ -167,7 +167,7 @@ ClientKnobs::ClientKnobs(bool randomize) { init( BLOBSTORE_LIST_REQUESTS_PER_SECOND, 25 ); init( BLOBSTORE_WRITE_REQUESTS_PER_SECOND, 50 ); init( BLOBSTORE_READ_REQUESTS_PER_SECOND, 100 ); - init( BLOBSTORE_DELETE_REQUESTS_PER_SECOND, 100 ); + init( BLOBSTORE_DELETE_REQUESTS_PER_SECOND, 200 ); // Client Status Info init(CSI_SAMPLING_PROBABILITY, -1.0); From 011f0ce7c03a133fc6433268a4b1ecda42894c73 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Thu, 21 Jun 2018 12:16:13 -0700 Subject: [PATCH 11/26] reordered the release notes --- documentation/sphinx/source/release-notes.rst | 62 +++---------------- 1 file changed, 9 insertions(+), 53 deletions(-) diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index 1e0f4caa26..9ed5a748ee 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -8,65 +8,14 @@ Release Notes Features -------- -* Added knobs and blob Backup URL parameters for operations/sec limits by operation type. `(PR #513) https://github.com/apple/foundationdb/pull/513`_ - -Fixes ------ - -* Don't disable certificate checks by default. `(PR #511) https://github.com/apple/foundationdb/pull/511`_ - - -5.2.4 -===== - -Features --------- - -* tls_verify_peers splits input using the '|' character. `(PR #468) https://github.com/apple/foundationdb/pull/468`_ - -Fixes ------ - -* fdbcli kill command did not work when TLS was enabled. `(PR #471) https://github.com/apple/foundationdb/pull/471`_ - -5.2.3 -===== - -Fixes ------ - -* Backup would attempt to clear too many ranges in a single transaction when erasing log ranges. `(PR #440) https://github.com/apple/foundationdb/pull/440`_ -* A read-only transaction using the ``READ_LOCK_AWARE`` option would fail if committed. `(PR #437) https://github.com/apple/foundationdb/pull/437`_ - -5.2.2 -===== - -Fixes ------ - -* Build would fail on recent versions of Clang. `(PR #389) https://github.com/apple/foundationdb/pull/389/files`_ -* Clusters running with TLS plugin would reject clients using non-server certificates. `(PR #396) https://github.com/apple/foundationdb/pull/396`_ - -5.2.1 -===== - -Fixes ------ - -* Client input validation would handle inputs to versionstamp mutations incorrectly if the API version was less than 520. `(Issue #387) `_ - -5.2.0 -===== - -Features --------- - * Backup and DR share a single mutation log when both are being used on the same cluster. Ongoing backups will be aborted when upgrading to 5.2. `(PR #3) `_ * Added a TLS plugin implementation. `(PR #343) `_ * Backup supports HTTPS for blobstore connections. `(PR #343) `_ * Added the APPEND_IF_FITS atomic operation. `(PR #22) `_ * Updated the SET_VERSIONSTAMPED_KEY atomic operation to take four bytes to specify the offset instead of two (if the API version is set to 520 or higher). `(Issue #148) `_ * Updated the SET_VERSIONSTAMPED_VALUE atomic operation to place the versionstamp at a specified offset in a value (if the API version is set to 520 or higher). `(Issue #148) `_ +* tls_verify_peers splits input using the '|' character. [5.2.4] `(PR #468) https://github.com/apple/foundationdb/pull/468`_ +* Added knobs and blob Backup URL parameters for operations/sec limits by operation type. [5.2.5] `(PR #513) https://github.com/apple/foundationdb/pull/513`_ Performance ----------- @@ -77,6 +26,13 @@ Fixes ----- * The client did not clear the storage server interface cache on endpoint failure for all request types. This causes up to one second of additional latency on the first get range request to a rebooted storage server. `(Issue #351) `_ +* Client input validation would handle inputs to versionstamp mutations incorrectly if the API version was less than 520. [5.2.1] `(Issue #387) `_ +* Build would fail on recent versions of Clang. [5.2.2] `(PR #389) https://github.com/apple/foundationdb/pull/389/files`_ +* Clusters running with TLS plugin would reject clients using non-server certificates. [5.2.2] `(PR #396) https://github.com/apple/foundationdb/pull/396`_ +* Backup would attempt to clear too many ranges in a single transaction when erasing log ranges. [5.2.3] `(PR #440) https://github.com/apple/foundationdb/pull/440`_ +* A read-only transaction using the ``READ_LOCK_AWARE`` option would fail if committed. [5.2.3] `(PR #437) https://github.com/apple/foundationdb/pull/437`_ +* fdbcli kill command did not work when TLS was enabled. [5.2.4] `(PR #471) https://github.com/apple/foundationdb/pull/471`_ +* Don't disable certificate checks by default. [5.2.5] `(PR #511) https://github.com/apple/foundationdb/pull/511`_ Status ------ From ea5aa51e085978fbe98ea0c19561d7a6ffe55e00 Mon Sep 17 00:00:00 2001 From: Bhaskar Muppana Date: Thu, 21 Jun 2018 14:01:19 -0700 Subject: [PATCH 12/26] 5.2.5 release related changes. --- documentation/sphinx/source/downloads.rst | 24 +++++++++++------------ packaging/msi/FDBInstaller.wxs | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/documentation/sphinx/source/downloads.rst b/documentation/sphinx/source/downloads.rst index f3b6df0858..8fc3dc3f00 100644 --- a/documentation/sphinx/source/downloads.rst +++ b/documentation/sphinx/source/downloads.rst @@ -10,38 +10,38 @@ macOS The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server. -* `FoundationDB-5.2.4.pkg `_ +* `FoundationDB-5.2.5.pkg `_ Ubuntu ------ The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x. -* `foundationdb-clients-5.2.4-1_amd64.deb `_ -* `foundationdb-server-5.2.4-1_amd64.deb `_ (depends on the clients package) +* `foundationdb-clients-5.2.5-1_amd64.deb `_ +* `foundationdb-server-5.2.5-1_amd64.deb `_ (depends on the clients package) RHEL/CentOS EL6 --------------- The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x. -* `foundationdb-clients-5.2.4-1.el6.x86_64.rpm `_ -* `foundationdb-server-5.2.4-1.el6.x86_64.rpm `_ (depends on the clients package) +* `foundationdb-clients-5.2.5-1.el6.x86_64.rpm `_ +* `foundationdb-server-5.2.5-1.el6.x86_64.rpm `_ (depends on the clients package) RHEL/CentOS EL7 --------------- The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x. -* `foundationdb-clients-5.2.4-1.el7.x86_64.rpm `_ -* `foundationdb-server-5.2.4-1.el7.x86_64.rpm `_ (depends on the clients package) +* `foundationdb-clients-5.2.5-1.el7.x86_64.rpm `_ +* `foundationdb-server-5.2.5-1.el7.x86_64.rpm `_ (depends on the clients package) Windows ------- The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server. -* `foundationdb-5.2.4-x64.msi `_ +* `foundationdb-5.2.5-x64.msi `_ API Language Bindings ===================== @@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part If you need to use the FoundationDB Python API from other Python installations or paths, download the Python package: -* `foundationdb-5.2.4.tar.gz `_ +* `foundationdb-5.2.5.tar.gz `_ Ruby 1.9.3/2.0.0+ ----------------- -* `fdb-5.2.4.gem `_ +* `fdb-5.2.5.gem `_ Java 8+ ------- -* `fdb-java-5.2.4.jar `_ -* `fdb-java-5.2.4-javadoc.jar `_ +* `fdb-java-5.2.5.jar `_ +* `fdb-java-5.2.5-javadoc.jar `_ Go 1.1+ ------- diff --git a/packaging/msi/FDBInstaller.wxs b/packaging/msi/FDBInstaller.wxs index ec9d21462f..ecf1eea713 100644 --- a/packaging/msi/FDBInstaller.wxs +++ b/packaging/msi/FDBInstaller.wxs @@ -32,7 +32,7 @@ Date: Thu, 21 Jun 2018 15:59:43 -0700 Subject: [PATCH 13/26] Add metrics for watches and mutations on the storage server. The storage server tracks its lag with the logs, and status tries to report a more accurate measure of this lag. --- documentation/StatusSchema.json | 5 ++- fdbserver/Status.actor.cpp | 14 +++++++- fdbserver/storageserver.actor.cpp | 43 +++++++++++++++++++++++- tests/fast/SidebandWithStatus.txt | 2 +- tests/rare/LargeApiCorrectnessStatus.txt | 2 +- tests/slow/DDBalanceAndRemoveStatus.txt | 2 +- 6 files changed, 62 insertions(+), 6 deletions(-) diff --git a/documentation/StatusSchema.json b/documentation/StatusSchema.json index 9ebbd6bd7c..744b0bac2c 100644 --- a/documentation/StatusSchema.json +++ b/documentation/StatusSchema.json @@ -62,7 +62,10 @@ ] }, "data_version":12341234, - "data_version_lag":12341234, + "data_lag": { + "seconds":5.0, + "versions":12341234 + }, "id":"eb84471d68c12d1d26f692a50000003f", "finished_queries":{ "hz":0.0, diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index a223fb7f3d..4ee4e73061 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -530,10 +530,22 @@ struct RolesInfo { Version version = parseInt64(extractAttribute(metrics, "Version")); obj["data_version"] = version; + int64_t versionLag = parseInt64(extractAttribute(metrics, "VersionLag")); if(maxTLogVersion > 0) { - obj["data_version_lag"] = std::max(0, maxTLogVersion - version); + // It's possible that the storage server hasn't talked to the logs recently, in which case it may not be aware of how far behind it is. + // To account for that, we also compute the version difference between each storage server and the tlog with the largest version. + // + // Because this data is only logged periodically, this difference will likely be an overestimate for the lag. We subtract off the logging interval + // in order to make this estimate a bounded underestimate instead. + versionLag = std::max(versionLag, maxTLogVersion - version - SERVER_KNOBS->STORAGE_LOGGING_DELAY * SERVER_KNOBS->VERSIONS_PER_SECOND); } + StatusObject dataLag; + dataLag["versions"] = versionLag; + dataLag["seconds"] = versionLag / (double)SERVER_KNOBS->VERSIONS_PER_SECOND; + + obj["data_lag"] = dataLag; + } catch (Error& e) { if(e.code() != error_code_attribute_not_found) throw e; diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index becea24b93..7cb8bcb218 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -348,6 +348,8 @@ public: NotifiedVersion oldestVersion; // See also storageVersion() NotifiedVersion durableVersion; // At least this version will be readable from storage after a power failure + int64_t versionLag; // An estimate for how many versions it takes for the data to move from the logs to this storage server + uint64_t logProtocol; Reference logSystem; @@ -365,6 +367,7 @@ public: AsyncMap watches; int64_t watchBytes; + int64_t numWatches; AsyncVar noRecentUpdates; double lastUpdate; @@ -399,9 +402,10 @@ public: struct Counters { CounterCollection cc; - Counter allQueries, getKeyQueries, getValueQueries, getRangeQueries, finishedQueries, rowsQueried, bytesQueried; + Counter allQueries, getKeyQueries, getValueQueries, getRangeQueries, finishedQueries, rowsQueried, bytesQueried, watchQueries; Counter bytesInput, bytesDurable, bytesFetched, mutationBytes; // Like bytesInput but without MVCC accounting + Counter mutations, setMutations, clearRangeMutations, atomicMutations; Counter updateBatches, updateVersions; Counter loops; @@ -414,10 +418,15 @@ public: finishedQueries("FinishedQueries", cc), rowsQueried("RowsQueried", cc), bytesQueried("BytesQueried", cc), + watchQueries("WatchQueries", cc), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), bytesFetched("BytesFetched", cc), mutationBytes("MutationBytes", cc), + mutations("Mutations", cc), + setMutations("SetMutations", cc), + clearRangeMutations("ClearRangeMutations", cc), + atomicMutations("AtomicMutations", cc), updateBatches("UpdateBatches", cc), updateVersions("UpdateVersions", cc), loops("Loops", cc) @@ -427,6 +436,7 @@ public: specialCounter(cc, "StorageVersion", [self](){ return self->storageVersion(); }); specialCounter(cc, "DurableVersion", [self](){ return self->durableVersion.get(); }); specialCounter(cc, "DesiredOldestVersion", [self](){ return self->desiredOldestVersion.get(); }); + specialCounter(cc, "VersionLag", [self](){ return self->versionLag; }); specialCounter(cc, "FetchKeysFetchActive", [self](){ return self->fetchKeysParallelismLock.activePermits(); }); specialCounter(cc, "FetchKeysWaiting", [self](){ return self->fetchKeysParallelismLock.waiters(); }); @@ -434,6 +444,8 @@ public: specialCounter(cc, "QueryQueueMax", [self](){ return self->getAndResetMaxQueryQueueSize(); }); specialCounter(cc, "BytesStored", [self](){ return self->metrics.byteSample.getEstimate(allKeys); }); + specialCounter(cc, "ActiveWatches", [self](){ return self->numWatches; }); + specialCounter(cc, "WatchBytes", [self](){ return self->watchBytes; }); specialCounter(cc, "KvstoreBytesUsed", [self](){ return self->storage.getStorageBytes().used; }); specialCounter(cc, "KvstoreBytesFree", [self](){ return self->storage.getStorageBytes().free; }); @@ -446,6 +458,7 @@ public: : instanceID(g_random->randomUniqueID().first()), storage(this, storage), db(db), lastTLogVersion(0), lastVersionWithData(0), restoredVersion(0), + versionLag(0), updateEagerReads(0), shardChangeCounter(0), fetchKeysParallelismLock(SERVER_KNOBS->FETCH_KEYS_PARALLELISM_BYTES), @@ -759,6 +772,8 @@ ACTOR Future getValueQ( StorageServer* data, GetValueRequest req ) { ACTOR Future watchValue_impl( StorageServer* data, WatchValueRequest req ) { try { + ++data->counters.watchQueries; + if( req.debugID.present() ) g_traceBatch.addEvent("WatchValueDebug", req.debugID.get().first(), "watchValueQ.Before"); //.detail("TaskID", g_network->getCurrentTask()); @@ -791,11 +806,14 @@ ACTOR Future watchValue_impl( StorageServer* data, WatchValueRequest req ) return Void(); } + ++data->numWatches; data->watchBytes += ( req.key.expectedSize() + req.value.expectedSize() + 1000 ); try { Void _ = wait( watchFuture ); + --data->numWatches; data->watchBytes -= ( req.key.expectedSize() + req.value.expectedSize() + 1000 ); } catch( Error &e ) { + --data->numWatches; data->watchBytes -= ( req.key.expectedSize() + req.value.expectedSize() + 1000 ); throw; } @@ -2353,6 +2371,7 @@ ACTOR Future update( StorageServer* data, bool* pReceivedUpdate ) ++data->counters.updateBatches; data->lastTLogVersion = cursor->getMaxKnownVersion(); + data->versionLag = std::max(0, data->lastTLogVersion - data->version.get()); ASSERT(*pReceivedUpdate == false); *pReceivedUpdate = true; @@ -2476,6 +2495,28 @@ ACTOR Future update( StorageServer* data, bool* pReceivedUpdate ) updater.applyMutation(data, msg, ver); data->counters.mutationBytes += msg.totalSize(); + ++data->counters.mutations; + switch(msg.type) { + case MutationRef::SetValue: + ++data->counters.setMutations; + break; + case MutationRef::ClearRange: + ++data->counters.clearRangeMutations; + break; + case MutationRef::AddValue: + case MutationRef::And: + case MutationRef::AndV2: + case MutationRef::AppendIfFits: + case MutationRef::ByteMax: + case MutationRef::ByteMin: + case MutationRef::Max: + case MutationRef::Min: + case MutationRef::MinV2: + case MutationRef::Or: + case MutationRef::Xor: + ++data->counters.atomicMutations; + break; + } } else TraceEvent(SevError, "DiscardingPeekedData", data->thisServerID).detail("Mutation", msg.toString()).detail("Version", cloneCursor2->version().toString()); diff --git a/tests/fast/SidebandWithStatus.txt b/tests/fast/SidebandWithStatus.txt index 94f4616d9f..de96bb9414 100644 --- a/tests/fast/SidebandWithStatus.txt +++ b/tests/fast/SidebandWithStatus.txt @@ -5,7 +5,7 @@ testTitle=CloggedCausalConsistencyTest testName=Status testDuration=30.0 - schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"processes":{"$map":{"version":"3.0.0","machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","locality":{"$map":"value"},"class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"input_bytes":{"hz":0,"counter":0,"roughness":0},"stored_bytes":12341234,"kvstore_used_bytes":12341234,"kvstore_available_bytes":12341234,"kvstore_free_bytes":12341234,"kvstore_total_bytes":12341234,"durable_bytes":{"hz":0,"counter":0,"roughness":0},"queue_disk_used_bytes":12341234,"queue_disk_available_bytes":12341234,"queue_disk_free_bytes":12341234,"queue_disk_total_bytes":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"data_version":12341234,"data_version_lag":12341234,"id":"eb84471d68c12d1d26f692a50000003f","finished_queries":{"hz":0,"counter":0,"roughness":0}}],"command_line":"-r simulation","memory":{"available_bytes":0,"limit_bytes":0,"unused_allocated_memory":0,"used_bytes":0},"messages":[{"time":12345.12312,"type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","description":"abc"}],"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","excluded":false,"address":"1.2.3.4:1234","disk":{"free_bytes":3451233456234,"reads":{"hz":0,"counter":0,"sectors":0},"busy":0,"writes":{"hz":0,"counter":0,"sectors":0},"total_bytes":123412341234},"uptime_seconds":1234.2345,"cpu":{"usage_cores":0},"network":{"current_connections":0,"connections_established":{"hz":0},"connections_closed":{"hz":0},"connection_errors":{"hz":0},"megabits_sent":{"hz":0},"megabits_received":{"hz":0}}}},"old_logs":[{"logs":[{"id":"7f8d623d0cb9966e","healthy":true,"address":"1.2.3.4:1234"}],"log_replication_factor":3,"log_write_anti_quorum":0,"log_fault_tolerance":2,"remote_log_replication_factor":3,"remote_log_fault_tolerance":2,"satellite_log_replication_factor":3,"satellite_log_write_anti_quorum":0,"satellite_log_fault_tolerance":2}],"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"qos":{"worst_queue_bytes_log_server":460,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]},"description":"The database is not being saturated by the workload."},"transactions_per_second_limit":0,"released_transactions_per_second":0,"limiting_queue_bytes_storage_server":0,"worst_queue_bytes_storage_server":0,"limiting_version_lag_storage_server":0,"worst_version_lag_storage_server":0},"incompatible_connections":[],"datacenter_version_difference":0,"database_available":true,"database_locked":false,"generation":2,"latency_probe":{"read_seconds":7,"immediate_priority_transaction_start_seconds":0,"batch_priority_transaction_start_seconds":0,"transaction_start_seconds":0,"commit_seconds":0.02},"clients":{"count":1,"supported_versions":[{"client_version":"3.0.0","connected_clients":[{"address":"127.0.0.1:9898","log_group":"default"}],"count":1,"protocol_version":"fdb00a400050001","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d"}]},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"recovery_state":{"required_resolvers":1,"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"required_logs":3,"missing_logs":"7f8d623d0cb9966e","description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0,"counter":0,"roughness":0},"reads":{"hz":0,"counter":0,"roughness":0}},"bytes":{"written":{"hz":0,"counter":0,"roughness":0},"read":{"hz":0,"counter":0,"roughness":0}},"keys":{"read":{"hz":0,"counter":0,"roughness":0}},"transactions":{"started":{"hz":0,"counter":0,"roughness":0},"conflicted":{"hz":0,"counter":0,"roughness":0},"committed":{"hz":0,"counter":0,"roughness":0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","full_replication":true,"configuration":{"log_anti_quorum":0,"log_replicas":2,"log_replication_policy":"(zoneid^3x1)","redundancy_mode":"single","regions":[{"datacenters":[{"id":"mr","priority":1,"satellite":1}],"satellite_redundancy_mode":"one_satellite_single","satellite_log_replicas":1,"satellite_usable_dcs":1,"satellite_anti_quorum":0,"satellite_log_policy":"(zoneid^3x1)","satellite_logs":2}],"remote_redundancy_mode":"remote_single","remote_log_replicas":3,"remote_logs":5,"usable_regions":1,"storage_replicas":1,"resolvers":1,"storage_replication_policy":"(zoneid^3x1)","logs":2,"storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"excluded_servers":[{"address":"10.0.4.1"}],"auto_proxies":3,"auto_resolvers":1,"auto_logs":3,"proxies":5},"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"min_replicas_remaining":0,"name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"description":""},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"partitions_count":2,"moving_data":{"total_written_bytes":0,"in_flight_bytes":0,"in_queue_bytes":0},"least_operating_space_bytes_storage_server":0,"max_machine_failures_without_losing_data":0},"machines":{"$map":{"network":{"megabits_sent":{"hz":0},"megabits_received":{"hz":0},"tcp_segments_retransmitted":{"hz":0}},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","locality":{"$map":"value"},"cpu":{"logical_core_utilization":0.4}}}},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"database_status":{"available":true,"healthy":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true}}} + schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"datacenter_version_difference":0,"processes":{"$map":{"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"data_lag":{"seconds":5.0,"versions":12341234},"input_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"kvstore_used_bytes":12341234,"stored_bytes":12341234,"kvstore_free_bytes":12341234,"durable_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"id":"eb84471d68c12d1d26f692a50000003f","data_version":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"queue_disk_available_bytes":12341234,"kvstore_available_bytes":12341234,"queue_disk_total_bytes":12341234,"queue_disk_used_bytes":12341234,"queue_disk_free_bytes":12341234,"kvstore_total_bytes":12341234,"finished_queries":{"hz":0.0,"counter":0,"roughness":0.0}}],"locality":{"$map":"value"},"messages":[{"description":"abc","type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","time":12345.12312}],"address":"1.2.3.4:1234","command_line":"-r simulation","disk":{"free_bytes":3451233456234,"reads":{"hz":0.0,"counter":0,"sectors":0},"busy":0.0,"writes":{"hz":0.0,"counter":0,"sectors":0},"total_bytes":123412341234},"version":"3.0.0","excluded":false,"memory":{"available_bytes":0,"unused_allocated_memory":0,"limit_bytes":0,"used_bytes":0},"machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","uptime_seconds":1234.2345,"cpu":{"usage_cores":0.0},"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"connections_closed":{"hz":0.0},"connection_errors":{"hz":0.0},"current_connections":0,"connections_established":{"hz":0.0}}}},"clients":{"count":1,"supported_versions":[{"count":1,"protocol_version":"fdb00a400050001","client_version":"3.0.0","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d","connected_clients":[{"log_group":"default","address":"127.0.0.1:9898"}]}]},"qos":{"limiting_version_lag_storage_server":0,"released_transactions_per_second":0,"transactions_per_second_limit":0,"limiting_queue_bytes_storage_server":0,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","description":"The database is not being saturated by the workload.","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]}},"worst_version_lag_storage_server":0,"worst_queue_bytes_log_server":460,"worst_queue_bytes_storage_server":0},"incompatible_connections":[],"full_replication":true,"database_locked":false,"generation":2,"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"description":"","name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"min_replicas_remaining":0},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"max_machine_failures_without_losing_data":0,"moving_data":{"in_queue_bytes":0,"total_written_bytes":0,"in_flight_bytes":0},"least_operating_space_bytes_storage_server":0,"partitions_count":2},"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"database_available":true,"recovery_state":{"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"missing_logs":"7f8d623d0cb9966e","required_resolvers":1,"required_logs":3,"description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0.0,"counter":0,"roughness":0.0},"reads":{"hz":0.0,"counter":0,"roughness":0.0}},"keys":{"read":{"hz":0.0,"counter":0,"roughness":0.0}},"bytes":{"read":{"hz":0.0,"counter":0,"roughness":0.0},"written":{"hz":0.0,"counter":0,"roughness":0.0}},"transactions":{"started":{"hz":0.0,"counter":0,"roughness":0.0},"conflicted":{"hz":0.0,"counter":0,"roughness":0.0},"committed":{"hz":0.0,"counter":0,"roughness":0.0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","configuration":{"resolvers":1,"regions":[{"satellite_redundancy_mode":"one_satellite_single","satellite_anti_quorum":0,"satellite_usable_dcs":1,"datacenters":[{"priority":1,"satellite":1,"id":"mr"}],"satellite_log_policy":"(zoneid^3x1)","satellite_log_replicas":1,"satellite_logs":2}],"remote_logs":5,"auto_logs":3,"logs":2,"log_anti_quorum":0,"storage_replicas":1,"log_replicas":2,"remote_redundancy_mode":"remote_single","storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"log_replication_policy":"(zoneid^3x1)","storage_replication_policy":"(zoneid^3x1)","remote_log_replicas":3,"excluded_servers":[{"address":"10.0.4.1"}],"auto_proxies":3,"proxies":5,"usable_regions":1,"redundancy_mode":"single","auto_resolvers":1},"latency_probe":{"immediate_priority_transaction_start_seconds":0.0,"transaction_start_seconds":0.0,"batch_priority_transaction_start_seconds":0.0,"read_seconds":7,"commit_seconds":0.02},"machines":{"$map":{"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"tcp_segments_retransmitted":{"hz":0.0}},"locality":{"$map":"value"},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","cpu":{"logical_core_utilization":0.4}}},"old_logs":[{"satellite_log_fault_tolerance":2,"logs":[{"healthy":true,"id":"7f8d623d0cb9966e","address":"1.2.3.4:1234"}],"satellite_log_write_anti_quorum":0,"remote_log_fault_tolerance":2,"log_fault_tolerance":2,"log_write_anti_quorum":0,"satellite_log_replication_factor":3,"remote_log_replication_factor":3,"log_replication_factor":3}]},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"database_status":{"available":true,"healthy":true}}} testName=RandomClogging testDuration=30.0 diff --git a/tests/rare/LargeApiCorrectnessStatus.txt b/tests/rare/LargeApiCorrectnessStatus.txt index 7db5388026..6f2bf3808d 100644 --- a/tests/rare/LargeApiCorrectnessStatus.txt +++ b/tests/rare/LargeApiCorrectnessStatus.txt @@ -24,4 +24,4 @@ testTitle=ApiCorrectnessTest testName=Status testDuration=30.0 - schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"processes":{"$map":{"version":"3.0.0","machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","locality":{"$map":"value"},"class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"input_bytes":{"hz":0,"counter":0,"roughness":0},"stored_bytes":12341234,"kvstore_used_bytes":12341234,"kvstore_available_bytes":12341234,"kvstore_free_bytes":12341234,"kvstore_total_bytes":12341234,"durable_bytes":{"hz":0,"counter":0,"roughness":0},"queue_disk_used_bytes":12341234,"queue_disk_available_bytes":12341234,"queue_disk_free_bytes":12341234,"queue_disk_total_bytes":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"data_version":12341234,"data_version_lag":12341234,"id":"eb84471d68c12d1d26f692a50000003f","finished_queries":{"hz":0,"counter":0,"roughness":0}}],"command_line":"-r simulation","memory":{"available_bytes":0,"limit_bytes":0,"unused_allocated_memory":0,"used_bytes":0},"messages":[{"time":12345.12312,"type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","description":"abc"}],"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","excluded":false,"address":"1.2.3.4:1234","disk":{"free_bytes":3451233456234,"reads":{"hz":0,"counter":0,"sectors":0},"busy":0,"writes":{"hz":0,"counter":0,"sectors":0},"total_bytes":123412341234},"uptime_seconds":1234.2345,"cpu":{"usage_cores":0},"network":{"current_connections":0,"connections_established":{"hz":0},"connections_closed":{"hz":0},"connection_errors":{"hz":0},"megabits_sent":{"hz":0},"megabits_received":{"hz":0}}}},"old_logs":[{"logs":[{"id":"7f8d623d0cb9966e","healthy":true,"address":"1.2.3.4:1234"}],"log_replication_factor":3,"log_write_anti_quorum":0,"log_fault_tolerance":2,"remote_log_replication_factor":3,"remote_log_fault_tolerance":2,"satellite_log_replication_factor":3,"satellite_log_write_anti_quorum":0,"satellite_log_fault_tolerance":2}],"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"qos":{"worst_queue_bytes_log_server":460,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]},"description":"The database is not being saturated by the workload."},"transactions_per_second_limit":0,"released_transactions_per_second":0,"limiting_queue_bytes_storage_server":0,"worst_queue_bytes_storage_server":0,"limiting_version_lag_storage_server":0,"worst_version_lag_storage_server":0},"incompatible_connections":[],"datacenter_version_difference":0,"database_available":true,"database_locked":false,"generation":2,"latency_probe":{"read_seconds":7,"immediate_priority_transaction_start_seconds":0,"batch_priority_transaction_start_seconds":0,"transaction_start_seconds":0,"commit_seconds":0.02},"clients":{"count":1,"supported_versions":[{"client_version":"3.0.0","connected_clients":[{"address":"127.0.0.1:9898","log_group":"default"}],"count":1,"protocol_version":"fdb00a400050001","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d"}]},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"recovery_state":{"required_resolvers":1,"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"required_logs":3,"missing_logs":"7f8d623d0cb9966e","description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0,"counter":0,"roughness":0},"reads":{"hz":0,"counter":0,"roughness":0}},"bytes":{"written":{"hz":0,"counter":0,"roughness":0},"read":{"hz":0,"counter":0,"roughness":0}},"keys":{"read":{"hz":0,"counter":0,"roughness":0}},"transactions":{"started":{"hz":0,"counter":0,"roughness":0},"conflicted":{"hz":0,"counter":0,"roughness":0},"committed":{"hz":0,"counter":0,"roughness":0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","full_replication":true,"configuration":{"log_anti_quorum":0,"log_replicas":2,"log_replication_policy":"(zoneid^3x1)","redundancy_mode":"single","regions":[{"datacenters":[{"id":"mr","priority":1,"satellite":1}],"satellite_redundancy_mode":"one_satellite_single","satellite_log_replicas":1,"satellite_usable_dcs":1,"satellite_anti_quorum":0,"satellite_log_policy":"(zoneid^3x1)","satellite_logs":2}],"remote_redundancy_mode":"remote_single","remote_log_replicas":3,"remote_logs":5,"usable_regions":1,"storage_replicas":1,"resolvers":1,"storage_replication_policy":"(zoneid^3x1)","logs":2,"storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"excluded_servers":[{"address":"10.0.4.1"}],"auto_proxies":3,"auto_resolvers":1,"auto_logs":3,"proxies":5},"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"min_replicas_remaining":0,"name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"description":""},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"partitions_count":2,"moving_data":{"total_written_bytes":0,"in_flight_bytes":0,"in_queue_bytes":0},"least_operating_space_bytes_storage_server":0,"max_machine_failures_without_losing_data":0},"machines":{"$map":{"network":{"megabits_sent":{"hz":0},"megabits_received":{"hz":0},"tcp_segments_retransmitted":{"hz":0}},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","locality":{"$map":"value"},"cpu":{"logical_core_utilization":0.4}}}},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"database_status":{"available":true,"healthy":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true}}} + schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"datacenter_version_difference":0,"processes":{"$map":{"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"data_lag":{"seconds":5.0,"versions":12341234},"input_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"kvstore_used_bytes":12341234,"stored_bytes":12341234,"kvstore_free_bytes":12341234,"durable_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"id":"eb84471d68c12d1d26f692a50000003f","data_version":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"queue_disk_available_bytes":12341234,"kvstore_available_bytes":12341234,"queue_disk_total_bytes":12341234,"queue_disk_used_bytes":12341234,"queue_disk_free_bytes":12341234,"kvstore_total_bytes":12341234,"finished_queries":{"hz":0.0,"counter":0,"roughness":0.0}}],"locality":{"$map":"value"},"messages":[{"description":"abc","type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","time":12345.12312}],"address":"1.2.3.4:1234","command_line":"-r simulation","disk":{"free_bytes":3451233456234,"reads":{"hz":0.0,"counter":0,"sectors":0},"busy":0.0,"writes":{"hz":0.0,"counter":0,"sectors":0},"total_bytes":123412341234},"version":"3.0.0","excluded":false,"memory":{"available_bytes":0,"unused_allocated_memory":0,"limit_bytes":0,"used_bytes":0},"machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","uptime_seconds":1234.2345,"cpu":{"usage_cores":0.0},"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"connections_closed":{"hz":0.0},"connection_errors":{"hz":0.0},"current_connections":0,"connections_established":{"hz":0.0}}}},"clients":{"count":1,"supported_versions":[{"count":1,"protocol_version":"fdb00a400050001","client_version":"3.0.0","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d","connected_clients":[{"log_group":"default","address":"127.0.0.1:9898"}]}]},"qos":{"limiting_version_lag_storage_server":0,"released_transactions_per_second":0,"transactions_per_second_limit":0,"limiting_queue_bytes_storage_server":0,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","description":"The database is not being saturated by the workload.","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]}},"worst_version_lag_storage_server":0,"worst_queue_bytes_log_server":460,"worst_queue_bytes_storage_server":0},"incompatible_connections":[],"full_replication":true,"database_locked":false,"generation":2,"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"description":"","name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"min_replicas_remaining":0},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"max_machine_failures_without_losing_data":0,"moving_data":{"in_queue_bytes":0,"total_written_bytes":0,"in_flight_bytes":0},"least_operating_space_bytes_storage_server":0,"partitions_count":2},"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"database_available":true,"recovery_state":{"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"missing_logs":"7f8d623d0cb9966e","required_resolvers":1,"required_logs":3,"description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0.0,"counter":0,"roughness":0.0},"reads":{"hz":0.0,"counter":0,"roughness":0.0}},"keys":{"read":{"hz":0.0,"counter":0,"roughness":0.0}},"bytes":{"read":{"hz":0.0,"counter":0,"roughness":0.0},"written":{"hz":0.0,"counter":0,"roughness":0.0}},"transactions":{"started":{"hz":0.0,"counter":0,"roughness":0.0},"conflicted":{"hz":0.0,"counter":0,"roughness":0.0},"committed":{"hz":0.0,"counter":0,"roughness":0.0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","configuration":{"resolvers":1,"regions":[{"satellite_redundancy_mode":"one_satellite_single","satellite_anti_quorum":0,"satellite_usable_dcs":1,"datacenters":[{"priority":1,"satellite":1,"id":"mr"}],"satellite_log_policy":"(zoneid^3x1)","satellite_log_replicas":1,"satellite_logs":2}],"remote_logs":5,"auto_logs":3,"logs":2,"log_anti_quorum":0,"storage_replicas":1,"log_replicas":2,"remote_redundancy_mode":"remote_single","storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"log_replication_policy":"(zoneid^3x1)","storage_replication_policy":"(zoneid^3x1)","remote_log_replicas":3,"excluded_servers":[{"address":"10.0.4.1"}],"auto_proxies":3,"proxies":5,"usable_regions":1,"redundancy_mode":"single","auto_resolvers":1},"latency_probe":{"immediate_priority_transaction_start_seconds":0.0,"transaction_start_seconds":0.0,"batch_priority_transaction_start_seconds":0.0,"read_seconds":7,"commit_seconds":0.02},"machines":{"$map":{"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"tcp_segments_retransmitted":{"hz":0.0}},"locality":{"$map":"value"},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","cpu":{"logical_core_utilization":0.4}}},"old_logs":[{"satellite_log_fault_tolerance":2,"logs":[{"healthy":true,"id":"7f8d623d0cb9966e","address":"1.2.3.4:1234"}],"satellite_log_write_anti_quorum":0,"remote_log_fault_tolerance":2,"log_fault_tolerance":2,"log_write_anti_quorum":0,"satellite_log_replication_factor":3,"remote_log_replication_factor":3,"log_replication_factor":3}]},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"database_status":{"available":true,"healthy":true}}} diff --git a/tests/slow/DDBalanceAndRemoveStatus.txt b/tests/slow/DDBalanceAndRemoveStatus.txt index 7e07285c3c..01a7d7cc89 100644 --- a/tests/slow/DDBalanceAndRemoveStatus.txt +++ b/tests/slow/DDBalanceAndRemoveStatus.txt @@ -43,4 +43,4 @@ testTitle=DDBalance_test testName=Status testDuration=30.0 - schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"processes":{"$map":{"version":"3.0.0","machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","locality":{"$map":"value"},"class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"input_bytes":{"hz":0,"counter":0,"roughness":0},"stored_bytes":12341234,"kvstore_used_bytes":12341234,"kvstore_available_bytes":12341234,"kvstore_free_bytes":12341234,"kvstore_total_bytes":12341234,"durable_bytes":{"hz":0,"counter":0,"roughness":0},"queue_disk_used_bytes":12341234,"queue_disk_available_bytes":12341234,"queue_disk_free_bytes":12341234,"queue_disk_total_bytes":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"data_version":12341234,"data_version_lag":12341234,"id":"eb84471d68c12d1d26f692a50000003f","finished_queries":{"hz":0,"counter":0,"roughness":0}}],"command_line":"-r simulation","memory":{"available_bytes":0,"limit_bytes":0,"unused_allocated_memory":0,"used_bytes":0},"messages":[{"time":12345.12312,"type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","description":"abc"}],"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","excluded":false,"address":"1.2.3.4:1234","disk":{"free_bytes":3451233456234,"reads":{"hz":0,"counter":0,"sectors":0},"busy":0,"writes":{"hz":0,"counter":0,"sectors":0},"total_bytes":123412341234},"uptime_seconds":1234.2345,"cpu":{"usage_cores":0},"network":{"current_connections":0,"connections_established":{"hz":0},"connections_closed":{"hz":0},"connection_errors":{"hz":0},"megabits_sent":{"hz":0},"megabits_received":{"hz":0}}}},"old_logs":[{"logs":[{"id":"7f8d623d0cb9966e","healthy":true,"address":"1.2.3.4:1234"}],"log_replication_factor":3,"log_write_anti_quorum":0,"log_fault_tolerance":2,"remote_log_replication_factor":3,"remote_log_fault_tolerance":2,"satellite_log_replication_factor":3,"satellite_log_write_anti_quorum":0,"satellite_log_fault_tolerance":2}],"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"qos":{"worst_queue_bytes_log_server":460,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]},"description":"The database is not being saturated by the workload."},"transactions_per_second_limit":0,"released_transactions_per_second":0,"limiting_queue_bytes_storage_server":0,"worst_queue_bytes_storage_server":0,"limiting_version_lag_storage_server":0,"worst_version_lag_storage_server":0},"incompatible_connections":[],"datacenter_version_difference":0,"database_available":true,"database_locked":false,"generation":2,"latency_probe":{"read_seconds":7,"immediate_priority_transaction_start_seconds":0,"batch_priority_transaction_start_seconds":0,"transaction_start_seconds":0,"commit_seconds":0.02},"clients":{"count":1,"supported_versions":[{"client_version":"3.0.0","connected_clients":[{"address":"127.0.0.1:9898","log_group":"default"}],"count":1,"protocol_version":"fdb00a400050001","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d"}]},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"recovery_state":{"required_resolvers":1,"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"required_logs":3,"missing_logs":"7f8d623d0cb9966e","description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0,"counter":0,"roughness":0},"reads":{"hz":0,"counter":0,"roughness":0}},"bytes":{"written":{"hz":0,"counter":0,"roughness":0},"read":{"hz":0,"counter":0,"roughness":0}},"keys":{"read":{"hz":0,"counter":0,"roughness":0}},"transactions":{"started":{"hz":0,"counter":0,"roughness":0},"conflicted":{"hz":0,"counter":0,"roughness":0},"committed":{"hz":0,"counter":0,"roughness":0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","full_replication":true,"configuration":{"log_anti_quorum":0,"log_replicas":2,"log_replication_policy":"(zoneid^3x1)","redundancy_mode":"single","regions":[{"datacenters":[{"id":"mr","priority":1,"satellite":1}],"satellite_redundancy_mode":"one_satellite_single","satellite_log_replicas":1,"satellite_usable_dcs":1,"satellite_anti_quorum":0,"satellite_log_policy":"(zoneid^3x1)","satellite_logs":2}],"remote_redundancy_mode":"remote_single","remote_log_replicas":3,"remote_logs":5,"usable_regions":1,"storage_replicas":1,"resolvers":1,"storage_replication_policy":"(zoneid^3x1)","logs":2,"storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"excluded_servers":[{"address":"10.0.4.1"}],"auto_proxies":3,"auto_resolvers":1,"auto_logs":3,"proxies":5},"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"min_replicas_remaining":0,"name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"description":""},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"partitions_count":2,"moving_data":{"total_written_bytes":0,"in_flight_bytes":0,"in_queue_bytes":0},"least_operating_space_bytes_storage_server":0,"max_machine_failures_without_losing_data":0},"machines":{"$map":{"network":{"megabits_sent":{"hz":0},"megabits_received":{"hz":0},"tcp_segments_retransmitted":{"hz":0}},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","locality":{"$map":"value"},"cpu":{"logical_core_utilization":0.4}}}},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"database_status":{"available":true,"healthy":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true}}} + schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"datacenter_version_difference":0,"processes":{"$map":{"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"data_lag":{"seconds":5.0,"versions":12341234},"input_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"kvstore_used_bytes":12341234,"stored_bytes":12341234,"kvstore_free_bytes":12341234,"durable_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"id":"eb84471d68c12d1d26f692a50000003f","data_version":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"queue_disk_available_bytes":12341234,"kvstore_available_bytes":12341234,"queue_disk_total_bytes":12341234,"queue_disk_used_bytes":12341234,"queue_disk_free_bytes":12341234,"kvstore_total_bytes":12341234,"finished_queries":{"hz":0.0,"counter":0,"roughness":0.0}}],"locality":{"$map":"value"},"messages":[{"description":"abc","type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","time":12345.12312}],"address":"1.2.3.4:1234","command_line":"-r simulation","disk":{"free_bytes":3451233456234,"reads":{"hz":0.0,"counter":0,"sectors":0},"busy":0.0,"writes":{"hz":0.0,"counter":0,"sectors":0},"total_bytes":123412341234},"version":"3.0.0","excluded":false,"memory":{"available_bytes":0,"unused_allocated_memory":0,"limit_bytes":0,"used_bytes":0},"machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","uptime_seconds":1234.2345,"cpu":{"usage_cores":0.0},"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"connections_closed":{"hz":0.0},"connection_errors":{"hz":0.0},"current_connections":0,"connections_established":{"hz":0.0}}}},"clients":{"count":1,"supported_versions":[{"count":1,"protocol_version":"fdb00a400050001","client_version":"3.0.0","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d","connected_clients":[{"log_group":"default","address":"127.0.0.1:9898"}]}]},"qos":{"limiting_version_lag_storage_server":0,"released_transactions_per_second":0,"transactions_per_second_limit":0,"limiting_queue_bytes_storage_server":0,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","description":"The database is not being saturated by the workload.","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]}},"worst_version_lag_storage_server":0,"worst_queue_bytes_log_server":460,"worst_queue_bytes_storage_server":0},"incompatible_connections":[],"full_replication":true,"database_locked":false,"generation":2,"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"description":"","name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"min_replicas_remaining":0},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"max_machine_failures_without_losing_data":0,"moving_data":{"in_queue_bytes":0,"total_written_bytes":0,"in_flight_bytes":0},"least_operating_space_bytes_storage_server":0,"partitions_count":2},"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"database_available":true,"recovery_state":{"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"missing_logs":"7f8d623d0cb9966e","required_resolvers":1,"required_logs":3,"description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0.0,"counter":0,"roughness":0.0},"reads":{"hz":0.0,"counter":0,"roughness":0.0}},"keys":{"read":{"hz":0.0,"counter":0,"roughness":0.0}},"bytes":{"read":{"hz":0.0,"counter":0,"roughness":0.0},"written":{"hz":0.0,"counter":0,"roughness":0.0}},"transactions":{"started":{"hz":0.0,"counter":0,"roughness":0.0},"conflicted":{"hz":0.0,"counter":0,"roughness":0.0},"committed":{"hz":0.0,"counter":0,"roughness":0.0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","configuration":{"resolvers":1,"regions":[{"satellite_redundancy_mode":"one_satellite_single","satellite_anti_quorum":0,"satellite_usable_dcs":1,"datacenters":[{"priority":1,"satellite":1,"id":"mr"}],"satellite_log_policy":"(zoneid^3x1)","satellite_log_replicas":1,"satellite_logs":2}],"remote_logs":5,"auto_logs":3,"logs":2,"log_anti_quorum":0,"storage_replicas":1,"log_replicas":2,"remote_redundancy_mode":"remote_single","storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"log_replication_policy":"(zoneid^3x1)","storage_replication_policy":"(zoneid^3x1)","remote_log_replicas":3,"excluded_servers":[{"address":"10.0.4.1"}],"auto_proxies":3,"proxies":5,"usable_regions":1,"redundancy_mode":"single","auto_resolvers":1},"latency_probe":{"immediate_priority_transaction_start_seconds":0.0,"transaction_start_seconds":0.0,"batch_priority_transaction_start_seconds":0.0,"read_seconds":7,"commit_seconds":0.02},"machines":{"$map":{"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"tcp_segments_retransmitted":{"hz":0.0}},"locality":{"$map":"value"},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","cpu":{"logical_core_utilization":0.4}}},"old_logs":[{"satellite_log_fault_tolerance":2,"logs":[{"healthy":true,"id":"7f8d623d0cb9966e","address":"1.2.3.4:1234"}],"satellite_log_write_anti_quorum":0,"remote_log_fault_tolerance":2,"log_fault_tolerance":2,"log_write_anti_quorum":0,"satellite_log_replication_factor":3,"remote_log_replication_factor":3,"log_replication_factor":3}]},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"database_status":{"available":true,"healthy":true}}} From 36f84c9cff156f26e373992b217b896c322bff86 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Thu, 21 Jun 2018 16:03:05 -0700 Subject: [PATCH 14/26] Fix uninitialized variable --- fdbserver/storageserver.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 7cb8bcb218..135fb37c35 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -462,7 +462,7 @@ public: updateEagerReads(0), shardChangeCounter(0), fetchKeysParallelismLock(SERVER_KNOBS->FETCH_KEYS_PARALLELISM_BYTES), - shuttingDown(false), debug_inApplyUpdate(false), debug_lastValidateTime(0), watchBytes(0), + shuttingDown(false), debug_inApplyUpdate(false), debug_lastValidateTime(0), watchBytes(0), numWatches(0), logProtocol(0), counters(this), tag(invalidTag), maxQueryQueue(0), thisServerID(ssi.id()), readQueueSizeMetric(LiteralStringRef("StorageServer.ReadQueueSize")), behind(false), byteSampleClears(false, LiteralStringRef("\xff\xff\xff")), noRecentUpdates(false), From 678b4494f45638d612427cfb6555ee122582e8ea Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Thu, 21 Jun 2018 16:31:52 -0700 Subject: [PATCH 15/26] added logging for the datacenter version difference --- fdbserver/ClusterController.actor.cpp | 9 +++++++-- fdbserver/Knobs.cpp | 2 +- fdbserver/Knobs.h | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 392835214f..c9ec8c02bb 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -1941,6 +1941,7 @@ ACTOR Future updatedChangedDatacenters(ClusterControllerData *self) { } ACTOR Future updateDatacenterVersionDifference( ClusterControllerData *self ) { + double lastLogTime = 0; loop { self->versionDifferenceUpdated = false; if(self->db.serverInfo->get().recoveryState >= RecoveryState::FULLY_RECOVERED && self->db.config.usableRegions == 1) { @@ -1977,12 +1978,12 @@ ACTOR Future updateDatacenterVersionDifference( ClusterControllerData *sel Void _ = wait(self->db.serverInfo->onChange()); continue; } - + state Future onChange = self->db.serverInfo->onChange(); loop { state Future primaryMetrics = primaryLog.get().getQueuingMetrics.getReply( TLogQueuingMetricsRequest() ); state Future remoteMetrics = remoteLog.get().getQueuingMetrics.getReply( TLogQueuingMetricsRequest() ); - + Void _ = wait( ( success(primaryMetrics) && success(remoteMetrics) ) || onChange ); if(onChange.isReady()) { break; @@ -1990,6 +1991,10 @@ ACTOR Future updateDatacenterVersionDifference( ClusterControllerData *sel self->versionDifferenceUpdated = true; self->datacenterVersionDifference = primaryMetrics.get().v - remoteMetrics.get().v; + if(now() - lastLogTime > SERVER_KNOBS->CLUSTER_CONTROLLER_LOGGING_DELAY) { + lastLogTime = now(); + TraceEvent("DatacenterVersionDifference", self->id).detail("Difference", self->datacenterVersionDifference); + } Void _ = wait( delay(SERVER_KNOBS->VERSION_LAG_METRIC_INTERVAL) || onChange ); if(onChange.isReady()) { diff --git a/fdbserver/Knobs.cpp b/fdbserver/Knobs.cpp index 56155ef8fd..8d127b56dc 100644 --- a/fdbserver/Knobs.cpp +++ b/fdbserver/Knobs.cpp @@ -244,7 +244,6 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) { init( COMMIT_BATCHES_MEM_TO_TOTAL_MEM_SCALE_FACTOR, 10.0 ); // Master Server - init( MASTER_LOGGING_DELAY, 1.0 ); // masterCommitter() in the master server will allow lower priority tasks (e.g. DataDistibution) // by delay()ing for this amount of time between accepted batches of TransactionRequests. init( COMMIT_SLEEP_TIME, 0.0001 ); if( randomize && BUGGIFY ) COMMIT_SLEEP_TIME = 0; @@ -263,6 +262,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) { init( LAST_LIMITED_RATIO, 0.6 ); //Cluster Controller + init( CLUSTER_CONTROLLER_LOGGING_DELAY, 5.0 ); init( MASTER_FAILURE_REACTION_TIME, 0.4 ); if( randomize && BUGGIFY ) MASTER_FAILURE_REACTION_TIME = 10.0; init( MASTER_FAILURE_SLOPE_DURING_RECOVERY, 0.1 ); init( WORKER_COORDINATION_PING_DELAY, 60 ); diff --git a/fdbserver/Knobs.h b/fdbserver/Knobs.h index 94671b7747..27125fb8e2 100644 --- a/fdbserver/Knobs.h +++ b/fdbserver/Knobs.h @@ -188,7 +188,6 @@ public: double PROXY_SPIN_DELAY; // Master Server - double MASTER_LOGGING_DELAY; double COMMIT_SLEEP_TIME; double MIN_BALANCE_TIME; int64_t MIN_BALANCE_DIFFERENCE; @@ -204,6 +203,7 @@ public: int64_t RESOLVER_STATE_MEMORY_LIMIT; //Cluster Controller + double CLUSTER_CONTROLLER_LOGGING_DELAY; double MASTER_FAILURE_REACTION_TIME; double MASTER_FAILURE_SLOPE_DURING_RECOVERY; int WORKER_COORDINATION_PING_DELAY; From 9a91dad5bd106b47e51240b95fd3e32b8e0a38f5 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Thu, 21 Jun 2018 16:34:36 -0700 Subject: [PATCH 16/26] fixed compile issue --- fdbserver/ClusterController.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index c9ec8c02bb..868afb26d9 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -1941,7 +1941,7 @@ ACTOR Future updatedChangedDatacenters(ClusterControllerData *self) { } ACTOR Future updateDatacenterVersionDifference( ClusterControllerData *self ) { - double lastLogTime = 0; + state double lastLogTime = 0; loop { self->versionDifferenceUpdated = false; if(self->db.serverInfo->get().recoveryState >= RecoveryState::FULLY_RECOVERED && self->db.config.usableRegions == 1) { From 23245b02ef951141bdc4fab2fd2259403b5e7b5f Mon Sep 17 00:00:00 2001 From: Bhaskar Muppana Date: Thu, 21 Jun 2018 16:51:54 -0700 Subject: [PATCH 17/26] Post release steps for 5.2.5 --- packaging/msi/FDBInstaller.wxs | 2 +- versions.target | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packaging/msi/FDBInstaller.wxs b/packaging/msi/FDBInstaller.wxs index ecf1eea713..6e299b7c6d 100644 --- a/packaging/msi/FDBInstaller.wxs +++ b/packaging/msi/FDBInstaller.wxs @@ -32,7 +32,7 @@ - 5.2.5 + 5.2.6 5.2 From 8dee95a2824d16d665e8614c99d3463ccc2366f5 Mon Sep 17 00:00:00 2001 From: Alex Miller Date: Thu, 21 Jun 2018 17:19:29 -0700 Subject: [PATCH 18/26] My previous `find` line was actually just wrong. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 828a455f2b..715215eb1b 100644 --- a/Makefile +++ b/Makefile @@ -154,7 +154,7 @@ clean: $(CLEAN_TARGETS) docpreview_clean @rm -rf $(DEPSDIR) @rm -rf lib/ @rm -rf bin/coverage.*.xml - @rm -f */*.g.cpp */*/*/g.cpp */*.g.h */*/*.g.h + @find . -name "*.g.cpp" -exec rm -f {} \; -or -name "*.g.h" -exec rm -f {} \; targets: @echo "Available targets:" From 8a8914f046568da7ab34fb8eeb329f0533d51ee8 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Fri, 22 Jun 2018 00:04:00 -0700 Subject: [PATCH 19/26] re-added the ability to configure the number of log routers. Many log routers are needed to get a sufficient number of sockets involved in copying data across the WAN --- documentation/StatusSchema.json | 1 + fdbcli/fdbcli.actor.cpp | 6 ++++++ fdbclient/DatabaseConfiguration.cpp | 6 +++++- fdbclient/DatabaseConfiguration.h | 1 + fdbclient/ManagementAPI.actor.cpp | 4 ++-- fdbserver/ClusterController.actor.cpp | 9 +++++---- fdbserver/SimulatedCluster.actor.cpp | 1 + fdbserver/TagPartitionedLogSystem.actor.cpp | 2 +- fdbserver/masterserver.actor.cpp | 2 +- tests/fast/SidebandWithStatus.txt | 2 +- tests/rare/LargeApiCorrectnessStatus.txt | 2 +- tests/slow/DDBalanceAndRemoveStatus.txt | 2 +- 12 files changed, 26 insertions(+), 12 deletions(-) diff --git a/documentation/StatusSchema.json b/documentation/StatusSchema.json index 9ebbd6bd7c..075f319885 100644 --- a/documentation/StatusSchema.json +++ b/documentation/StatusSchema.json @@ -362,6 +362,7 @@ "remote_redundancy_mode":"remote_single", "remote_log_replicas":3, "remote_logs":5, + "log_routers":10, "usable_regions":1, "storage_replicas":1, "resolvers":1, diff --git a/fdbcli/fdbcli.actor.cpp b/fdbcli/fdbcli.actor.cpp index a89783b842..7024155100 100644 --- a/fdbcli/fdbcli.actor.cpp +++ b/fdbcli/fdbcli.actor.cpp @@ -884,6 +884,12 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level, if (statusObjConfig.get("logs", intVal)) outputString += format("\n Desired Logs - %d", intVal); + + if (statusObjConfig.get("remote_logs", intVal)) + outputString += format("\n Desired Remote Logs - %d", intVal); + + if (statusObjConfig.get("log_routers", intVal)) + outputString += format("\n Desired Log Routers - %d", intVal); } catch (std::runtime_error& e) { outputString = outputStringCache; diff --git a/fdbclient/DatabaseConfiguration.cpp b/fdbclient/DatabaseConfiguration.cpp index 13bc44f442..05e2ef0b20 100644 --- a/fdbclient/DatabaseConfiguration.cpp +++ b/fdbclient/DatabaseConfiguration.cpp @@ -29,7 +29,7 @@ DatabaseConfiguration::DatabaseConfiguration() void DatabaseConfiguration::resetInternal() { // does NOT reset rawConfiguration initialized = false; - masterProxyCount = resolverCount = desiredTLogCount = tLogWriteAntiQuorum = tLogReplicationFactor = storageTeamSize = -1; + masterProxyCount = resolverCount = desiredTLogCount = tLogWriteAntiQuorum = tLogReplicationFactor = storageTeamSize = desiredLogRouterCount = -1; tLogDataStoreType = storageServerStoreType = KeyValueStoreType::END; autoMasterProxyCount = CLIENT_KNOBS->DEFAULT_AUTO_PROXIES; autoResolverCount = CLIENT_KNOBS->DEFAULT_AUTO_RESOLVERS; @@ -297,6 +297,9 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const { if( resolverCount != -1 ) { result["resolvers"] = resolverCount; } + if( desiredLogRouterCount != -1 ) { + result["log_routers"] = desiredLogRouterCount; + } if( remoteDesiredTLogCount != -1 ) { result["remote_logs"] = remoteDesiredTLogCount; } @@ -336,6 +339,7 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) { else if (ck == LiteralStringRef("auto_logs")) parse(&autoDesiredTLogCount, value); else if (ck == LiteralStringRef("storage_replication_policy")) parseReplicationPolicy(&storagePolicy, value); else if (ck == LiteralStringRef("log_replication_policy")) parseReplicationPolicy(&tLogPolicy, value); + else if (ck == LiteralStringRef("log_routers")) parse(&desiredLogRouterCount, value); else if (ck == LiteralStringRef("remote_logs")) parse(&remoteDesiredTLogCount, value); else if (ck == LiteralStringRef("remote_log_replicas")) parse(&remoteTLogReplicationFactor, value); else if (ck == LiteralStringRef("remote_log_policy")) parseReplicationPolicy(&remoteTLogPolicy, value); diff --git a/fdbclient/DatabaseConfiguration.h b/fdbclient/DatabaseConfiguration.h index 4c16b41ad5..e9f09a463f 100644 --- a/fdbclient/DatabaseConfiguration.h +++ b/fdbclient/DatabaseConfiguration.h @@ -159,6 +159,7 @@ struct DatabaseConfiguration { KeyValueStoreType storageServerStoreType; // Remote TLogs + int32_t desiredLogRouterCount; int32_t remoteDesiredTLogCount; int32_t remoteTLogReplicationFactor; IRepPolicyRef remoteTLogPolicy; diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp index 5bda5e37c0..d990be2a0a 100644 --- a/fdbclient/ManagementAPI.actor.cpp +++ b/fdbclient/ManagementAPI.actor.cpp @@ -65,14 +65,14 @@ std::map configForToken( std::string const& mode ) { std::string key = mode.substr(0, pos); std::string value = mode.substr(pos+1); - if( (key == "logs" || key == "proxies" || key == "resolvers" || key == "remote_logs" || key == "satellite_logs" || key == "usable_regions") && isInteger(value) ) { + if( (key == "logs" || key == "proxies" || key == "resolvers" || key == "remote_logs" || key == "log_routers" || key == "satellite_logs" || key == "usable_regions") && isInteger(value) ) { out[p+key] = value; } if( key == "regions" ) { json_spirit::mValue mv; json_spirit::read_string( value, mv ); - + StatusObject regionObj; regionObj["regions"] = mv; out[p+key] = BinaryWriter::toValue(regionObj, IncludeVersion()).toString(); diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 868afb26d9..ce51234fbd 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -459,7 +459,6 @@ public: std::set> remoteDC; remoteDC.insert(req.dcId); - auto remoteLogs = getWorkersForTlogs( req.configuration, req.configuration.getRemoteTLogReplicationFactor(), req.configuration.getDesiredRemoteLogs(), req.configuration.getRemoteTLogPolicy(), id_used, false, remoteDC ); for(int i = 0; i < remoteLogs.size(); i++) { @@ -895,13 +894,15 @@ public: if(oldRemoteTLogFit < newRemoteTLogFit) return false; + int oldRouterCount = oldTLogFit.count * std::max(1, db.config.desiredLogRouterCount / std::max(1,oldTLogFit.count)); + int newRouterCount = newTLogFit.count * std::max(1, db.config.desiredLogRouterCount / std::max(1,newTLogFit.count)); RoleFitness oldLogRoutersFit(log_routers, ProcessClass::LogRouter); - RoleFitness newLogRoutersFit((db.config.usableRegions > 1 && dbi.recoveryState == RecoveryState::REMOTE_RECOVERED) ? getWorkersForRoleInDatacenter( *remoteDC.begin(), ProcessClass::LogRouter, newTLogFit.count, db.config, id_used, Optional(), true ) : log_routers, ProcessClass::LogRouter); + RoleFitness newLogRoutersFit((db.config.usableRegions > 1 && dbi.recoveryState == RecoveryState::REMOTE_RECOVERED) ? getWorkersForRoleInDatacenter( *remoteDC.begin(), ProcessClass::LogRouter, newRouterCount, db.config, id_used, Optional(), true ) : log_routers, ProcessClass::LogRouter); - if(oldLogRoutersFit.count < oldTLogFit.count) { + if(oldLogRoutersFit.count < oldRouterCount) { oldLogRoutersFit.worstFit = ProcessClass::NeverAssign; } - if(newLogRoutersFit.count < newTLogFit.count) { + if(newLogRoutersFit.count < newRouterCount) { newLogRoutersFit.worstFit = ProcessClass::NeverAssign; } diff --git a/fdbserver/SimulatedCluster.actor.cpp b/fdbserver/SimulatedCluster.actor.cpp index 5b5074b4f5..88c45287a0 100644 --- a/fdbserver/SimulatedCluster.actor.cpp +++ b/fdbserver/SimulatedCluster.actor.cpp @@ -863,6 +863,7 @@ void SimulationConfig::generateNormalConfig(int minimumReplication) { ASSERT(false); // Programmer forgot to adjust cases. } + if (g_random->random01() < 0.25) db.desiredLogRouterCount = g_random->randomInt(1,7); if (g_random->random01() < 0.25) db.remoteDesiredTLogCount = g_random->randomInt(1,7); } diff --git a/fdbserver/TagPartitionedLogSystem.actor.cpp b/fdbserver/TagPartitionedLogSystem.actor.cpp index 38ffd09407..b3eca87976 100644 --- a/fdbserver/TagPartitionedLogSystem.actor.cpp +++ b/fdbserver/TagPartitionedLogSystem.actor.cpp @@ -1600,7 +1600,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedrecruitmentID = logSystem->recruitmentID; if(configuration.usableRegions > 1) { - logSystem->logRouterTags = recr.tLogs.size(); + logSystem->logRouterTags = recr.tLogs.size() * std::max(1, configuration.desiredLogRouterCount / std::max(1,recr.tLogs.size())); logSystem->expectedLogSets++; } else { logSystem->logRouterTags = 0; diff --git a/fdbserver/masterserver.actor.cpp b/fdbserver/masterserver.actor.cpp index ce1a68ef29..9570f69955 100644 --- a/fdbserver/masterserver.actor.cpp +++ b/fdbserver/masterserver.actor.cpp @@ -311,7 +311,7 @@ ACTOR Future newTLogServers( Reference self, RecruitFromConfig self->dcId_locality[remoteDcId] = loc; } - Future fRemoteWorkers = brokenPromiseToNever( self->clusterController.recruitRemoteFromConfiguration.getReply( RecruitRemoteFromConfigurationRequest( self->configuration, remoteDcId, recr.tLogs.size() ) ) ); + Future fRemoteWorkers = brokenPromiseToNever( self->clusterController.recruitRemoteFromConfiguration.getReply( RecruitRemoteFromConfigurationRequest( self->configuration, remoteDcId, recr.tLogs.size() * std::max(1, self->configuration.desiredLogRouterCount / std::max(1, recr.tLogs.size())) ) ) ); Reference newLogSystem = wait( oldLogSystem->newEpoch( recr, fRemoteWorkers, self->configuration, self->cstate.myDBState.recoveryCount + 1, self->dcId_locality[recr.dcId], self->dcId_locality[remoteDcId], self->allTags, self->recruitmentStalled ) ); self->logSystem = newLogSystem; diff --git a/tests/fast/SidebandWithStatus.txt b/tests/fast/SidebandWithStatus.txt index 94f4616d9f..8bca27d0b4 100644 --- a/tests/fast/SidebandWithStatus.txt +++ b/tests/fast/SidebandWithStatus.txt @@ -5,7 +5,7 @@ testTitle=CloggedCausalConsistencyTest testName=Status testDuration=30.0 - schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"processes":{"$map":{"version":"3.0.0","machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","locality":{"$map":"value"},"class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"input_bytes":{"hz":0,"counter":0,"roughness":0},"stored_bytes":12341234,"kvstore_used_bytes":12341234,"kvstore_available_bytes":12341234,"kvstore_free_bytes":12341234,"kvstore_total_bytes":12341234,"durable_bytes":{"hz":0,"counter":0,"roughness":0},"queue_disk_used_bytes":12341234,"queue_disk_available_bytes":12341234,"queue_disk_free_bytes":12341234,"queue_disk_total_bytes":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"data_version":12341234,"data_version_lag":12341234,"id":"eb84471d68c12d1d26f692a50000003f","finished_queries":{"hz":0,"counter":0,"roughness":0}}],"command_line":"-r simulation","memory":{"available_bytes":0,"limit_bytes":0,"unused_allocated_memory":0,"used_bytes":0},"messages":[{"time":12345.12312,"type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","description":"abc"}],"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","excluded":false,"address":"1.2.3.4:1234","disk":{"free_bytes":3451233456234,"reads":{"hz":0,"counter":0,"sectors":0},"busy":0,"writes":{"hz":0,"counter":0,"sectors":0},"total_bytes":123412341234},"uptime_seconds":1234.2345,"cpu":{"usage_cores":0},"network":{"current_connections":0,"connections_established":{"hz":0},"connections_closed":{"hz":0},"connection_errors":{"hz":0},"megabits_sent":{"hz":0},"megabits_received":{"hz":0}}}},"old_logs":[{"logs":[{"id":"7f8d623d0cb9966e","healthy":true,"address":"1.2.3.4:1234"}],"log_replication_factor":3,"log_write_anti_quorum":0,"log_fault_tolerance":2,"remote_log_replication_factor":3,"remote_log_fault_tolerance":2,"satellite_log_replication_factor":3,"satellite_log_write_anti_quorum":0,"satellite_log_fault_tolerance":2}],"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"qos":{"worst_queue_bytes_log_server":460,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]},"description":"The database is not being saturated by the workload."},"transactions_per_second_limit":0,"released_transactions_per_second":0,"limiting_queue_bytes_storage_server":0,"worst_queue_bytes_storage_server":0,"limiting_version_lag_storage_server":0,"worst_version_lag_storage_server":0},"incompatible_connections":[],"datacenter_version_difference":0,"database_available":true,"database_locked":false,"generation":2,"latency_probe":{"read_seconds":7,"immediate_priority_transaction_start_seconds":0,"batch_priority_transaction_start_seconds":0,"transaction_start_seconds":0,"commit_seconds":0.02},"clients":{"count":1,"supported_versions":[{"client_version":"3.0.0","connected_clients":[{"address":"127.0.0.1:9898","log_group":"default"}],"count":1,"protocol_version":"fdb00a400050001","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d"}]},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"recovery_state":{"required_resolvers":1,"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"required_logs":3,"missing_logs":"7f8d623d0cb9966e","description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0,"counter":0,"roughness":0},"reads":{"hz":0,"counter":0,"roughness":0}},"bytes":{"written":{"hz":0,"counter":0,"roughness":0},"read":{"hz":0,"counter":0,"roughness":0}},"keys":{"read":{"hz":0,"counter":0,"roughness":0}},"transactions":{"started":{"hz":0,"counter":0,"roughness":0},"conflicted":{"hz":0,"counter":0,"roughness":0},"committed":{"hz":0,"counter":0,"roughness":0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","full_replication":true,"configuration":{"log_anti_quorum":0,"log_replicas":2,"log_replication_policy":"(zoneid^3x1)","redundancy_mode":"single","regions":[{"datacenters":[{"id":"mr","priority":1,"satellite":1}],"satellite_redundancy_mode":"one_satellite_single","satellite_log_replicas":1,"satellite_usable_dcs":1,"satellite_anti_quorum":0,"satellite_log_policy":"(zoneid^3x1)","satellite_logs":2}],"remote_redundancy_mode":"remote_single","remote_log_replicas":3,"remote_logs":5,"usable_regions":1,"storage_replicas":1,"resolvers":1,"storage_replication_policy":"(zoneid^3x1)","logs":2,"storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"excluded_servers":[{"address":"10.0.4.1"}],"auto_proxies":3,"auto_resolvers":1,"auto_logs":3,"proxies":5},"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"min_replicas_remaining":0,"name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"description":""},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"partitions_count":2,"moving_data":{"total_written_bytes":0,"in_flight_bytes":0,"in_queue_bytes":0},"least_operating_space_bytes_storage_server":0,"max_machine_failures_without_losing_data":0},"machines":{"$map":{"network":{"megabits_sent":{"hz":0},"megabits_received":{"hz":0},"tcp_segments_retransmitted":{"hz":0}},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","locality":{"$map":"value"},"cpu":{"logical_core_utilization":0.4}}}},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"database_status":{"available":true,"healthy":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true}}} + schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"datacenter_version_difference":0,"processes":{"$map":{"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"data_version_lag":12341234,"input_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"kvstore_used_bytes":12341234,"stored_bytes":12341234,"kvstore_free_bytes":12341234,"durable_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"id":"eb84471d68c12d1d26f692a50000003f","data_version":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"queue_disk_available_bytes":12341234,"kvstore_available_bytes":12341234,"queue_disk_total_bytes":12341234,"queue_disk_used_bytes":12341234,"queue_disk_free_bytes":12341234,"kvstore_total_bytes":12341234,"finished_queries":{"hz":0.0,"counter":0,"roughness":0.0}}],"locality":{"$map":"value"},"messages":[{"description":"abc","type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","time":12345.12312}],"address":"1.2.3.4:1234","command_line":"-r simulation","disk":{"free_bytes":3451233456234,"reads":{"hz":0.0,"counter":0,"sectors":0},"busy":0.0,"writes":{"hz":0.0,"counter":0,"sectors":0},"total_bytes":123412341234},"version":"3.0.0","excluded":false,"memory":{"available_bytes":0,"unused_allocated_memory":0,"limit_bytes":0,"used_bytes":0},"machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","uptime_seconds":1234.2345,"cpu":{"usage_cores":0.0},"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"connections_closed":{"hz":0.0},"connection_errors":{"hz":0.0},"current_connections":0,"connections_established":{"hz":0.0}}}},"clients":{"count":1,"supported_versions":[{"count":1,"protocol_version":"fdb00a400050001","client_version":"3.0.0","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d","connected_clients":[{"log_group":"default","address":"127.0.0.1:9898"}]}]},"qos":{"limiting_version_lag_storage_server":0,"released_transactions_per_second":0,"transactions_per_second_limit":0,"limiting_queue_bytes_storage_server":0,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","description":"The database is not being saturated by the workload.","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]}},"worst_version_lag_storage_server":0,"worst_queue_bytes_log_server":460,"worst_queue_bytes_storage_server":0},"incompatible_connections":[],"full_replication":true,"database_locked":false,"generation":2,"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"description":"","name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"min_replicas_remaining":0},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"max_machine_failures_without_losing_data":0,"moving_data":{"in_queue_bytes":0,"total_written_bytes":0,"in_flight_bytes":0},"least_operating_space_bytes_storage_server":0,"partitions_count":2},"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"database_available":true,"recovery_state":{"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"missing_logs":"7f8d623d0cb9966e","required_resolvers":1,"required_logs":3,"description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0.0,"counter":0,"roughness":0.0},"reads":{"hz":0.0,"counter":0,"roughness":0.0}},"keys":{"read":{"hz":0.0,"counter":0,"roughness":0.0}},"bytes":{"read":{"hz":0.0,"counter":0,"roughness":0.0},"written":{"hz":0.0,"counter":0,"roughness":0.0}},"transactions":{"started":{"hz":0.0,"counter":0,"roughness":0.0},"conflicted":{"hz":0.0,"counter":0,"roughness":0.0},"committed":{"hz":0.0,"counter":0,"roughness":0.0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","configuration":{"resolvers":1,"regions":[{"satellite_redundancy_mode":"one_satellite_single","satellite_anti_quorum":0,"satellite_usable_dcs":1,"datacenters":[{"priority":1,"satellite":1,"id":"mr"}],"satellite_log_policy":"(zoneid^3x1)","satellite_log_replicas":1,"satellite_logs":2}],"remote_logs":5,"auto_logs":3,"logs":2,"log_anti_quorum":0,"storage_replicas":1,"log_replicas":2,"remote_redundancy_mode":"remote_single","storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"log_replication_policy":"(zoneid^3x1)","log_routers":10,"storage_replication_policy":"(zoneid^3x1)","remote_log_replicas":3,"excluded_servers":[{"address":"10.0.4.1"}],"auto_proxies":3,"proxies":5,"usable_regions":1,"redundancy_mode":"single","auto_resolvers":1},"latency_probe":{"immediate_priority_transaction_start_seconds":0.0,"transaction_start_seconds":0.0,"batch_priority_transaction_start_seconds":0.0,"read_seconds":7,"commit_seconds":0.02},"machines":{"$map":{"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"tcp_segments_retransmitted":{"hz":0.0}},"locality":{"$map":"value"},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","cpu":{"logical_core_utilization":0.4}}},"old_logs":[{"satellite_log_fault_tolerance":2,"logs":[{"healthy":true,"id":"7f8d623d0cb9966e","address":"1.2.3.4:1234"}],"satellite_log_write_anti_quorum":0,"remote_log_fault_tolerance":2,"log_fault_tolerance":2,"log_write_anti_quorum":0,"satellite_log_replication_factor":3,"remote_log_replication_factor":3,"log_replication_factor":3}]},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"database_status":{"available":true,"healthy":true}}} testName=RandomClogging testDuration=30.0 diff --git a/tests/rare/LargeApiCorrectnessStatus.txt b/tests/rare/LargeApiCorrectnessStatus.txt index 7db5388026..4826de87f3 100644 --- a/tests/rare/LargeApiCorrectnessStatus.txt +++ b/tests/rare/LargeApiCorrectnessStatus.txt @@ -24,4 +24,4 @@ testTitle=ApiCorrectnessTest testName=Status testDuration=30.0 - schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"processes":{"$map":{"version":"3.0.0","machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","locality":{"$map":"value"},"class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"input_bytes":{"hz":0,"counter":0,"roughness":0},"stored_bytes":12341234,"kvstore_used_bytes":12341234,"kvstore_available_bytes":12341234,"kvstore_free_bytes":12341234,"kvstore_total_bytes":12341234,"durable_bytes":{"hz":0,"counter":0,"roughness":0},"queue_disk_used_bytes":12341234,"queue_disk_available_bytes":12341234,"queue_disk_free_bytes":12341234,"queue_disk_total_bytes":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"data_version":12341234,"data_version_lag":12341234,"id":"eb84471d68c12d1d26f692a50000003f","finished_queries":{"hz":0,"counter":0,"roughness":0}}],"command_line":"-r simulation","memory":{"available_bytes":0,"limit_bytes":0,"unused_allocated_memory":0,"used_bytes":0},"messages":[{"time":12345.12312,"type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","description":"abc"}],"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","excluded":false,"address":"1.2.3.4:1234","disk":{"free_bytes":3451233456234,"reads":{"hz":0,"counter":0,"sectors":0},"busy":0,"writes":{"hz":0,"counter":0,"sectors":0},"total_bytes":123412341234},"uptime_seconds":1234.2345,"cpu":{"usage_cores":0},"network":{"current_connections":0,"connections_established":{"hz":0},"connections_closed":{"hz":0},"connection_errors":{"hz":0},"megabits_sent":{"hz":0},"megabits_received":{"hz":0}}}},"old_logs":[{"logs":[{"id":"7f8d623d0cb9966e","healthy":true,"address":"1.2.3.4:1234"}],"log_replication_factor":3,"log_write_anti_quorum":0,"log_fault_tolerance":2,"remote_log_replication_factor":3,"remote_log_fault_tolerance":2,"satellite_log_replication_factor":3,"satellite_log_write_anti_quorum":0,"satellite_log_fault_tolerance":2}],"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"qos":{"worst_queue_bytes_log_server":460,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]},"description":"The database is not being saturated by the workload."},"transactions_per_second_limit":0,"released_transactions_per_second":0,"limiting_queue_bytes_storage_server":0,"worst_queue_bytes_storage_server":0,"limiting_version_lag_storage_server":0,"worst_version_lag_storage_server":0},"incompatible_connections":[],"datacenter_version_difference":0,"database_available":true,"database_locked":false,"generation":2,"latency_probe":{"read_seconds":7,"immediate_priority_transaction_start_seconds":0,"batch_priority_transaction_start_seconds":0,"transaction_start_seconds":0,"commit_seconds":0.02},"clients":{"count":1,"supported_versions":[{"client_version":"3.0.0","connected_clients":[{"address":"127.0.0.1:9898","log_group":"default"}],"count":1,"protocol_version":"fdb00a400050001","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d"}]},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"recovery_state":{"required_resolvers":1,"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"required_logs":3,"missing_logs":"7f8d623d0cb9966e","description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0,"counter":0,"roughness":0},"reads":{"hz":0,"counter":0,"roughness":0}},"bytes":{"written":{"hz":0,"counter":0,"roughness":0},"read":{"hz":0,"counter":0,"roughness":0}},"keys":{"read":{"hz":0,"counter":0,"roughness":0}},"transactions":{"started":{"hz":0,"counter":0,"roughness":0},"conflicted":{"hz":0,"counter":0,"roughness":0},"committed":{"hz":0,"counter":0,"roughness":0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","full_replication":true,"configuration":{"log_anti_quorum":0,"log_replicas":2,"log_replication_policy":"(zoneid^3x1)","redundancy_mode":"single","regions":[{"datacenters":[{"id":"mr","priority":1,"satellite":1}],"satellite_redundancy_mode":"one_satellite_single","satellite_log_replicas":1,"satellite_usable_dcs":1,"satellite_anti_quorum":0,"satellite_log_policy":"(zoneid^3x1)","satellite_logs":2}],"remote_redundancy_mode":"remote_single","remote_log_replicas":3,"remote_logs":5,"usable_regions":1,"storage_replicas":1,"resolvers":1,"storage_replication_policy":"(zoneid^3x1)","logs":2,"storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"excluded_servers":[{"address":"10.0.4.1"}],"auto_proxies":3,"auto_resolvers":1,"auto_logs":3,"proxies":5},"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"min_replicas_remaining":0,"name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"description":""},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"partitions_count":2,"moving_data":{"total_written_bytes":0,"in_flight_bytes":0,"in_queue_bytes":0},"least_operating_space_bytes_storage_server":0,"max_machine_failures_without_losing_data":0},"machines":{"$map":{"network":{"megabits_sent":{"hz":0},"megabits_received":{"hz":0},"tcp_segments_retransmitted":{"hz":0}},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","locality":{"$map":"value"},"cpu":{"logical_core_utilization":0.4}}}},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"database_status":{"available":true,"healthy":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true}}} + schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"datacenter_version_difference":0,"processes":{"$map":{"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"data_version_lag":12341234,"input_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"kvstore_used_bytes":12341234,"stored_bytes":12341234,"kvstore_free_bytes":12341234,"durable_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"id":"eb84471d68c12d1d26f692a50000003f","data_version":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"queue_disk_available_bytes":12341234,"kvstore_available_bytes":12341234,"queue_disk_total_bytes":12341234,"queue_disk_used_bytes":12341234,"queue_disk_free_bytes":12341234,"kvstore_total_bytes":12341234,"finished_queries":{"hz":0.0,"counter":0,"roughness":0.0}}],"locality":{"$map":"value"},"messages":[{"description":"abc","type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","time":12345.12312}],"address":"1.2.3.4:1234","command_line":"-r simulation","disk":{"free_bytes":3451233456234,"reads":{"hz":0.0,"counter":0,"sectors":0},"busy":0.0,"writes":{"hz":0.0,"counter":0,"sectors":0},"total_bytes":123412341234},"version":"3.0.0","excluded":false,"memory":{"available_bytes":0,"unused_allocated_memory":0,"limit_bytes":0,"used_bytes":0},"machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","uptime_seconds":1234.2345,"cpu":{"usage_cores":0.0},"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"connections_closed":{"hz":0.0},"connection_errors":{"hz":0.0},"current_connections":0,"connections_established":{"hz":0.0}}}},"clients":{"count":1,"supported_versions":[{"count":1,"protocol_version":"fdb00a400050001","client_version":"3.0.0","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d","connected_clients":[{"log_group":"default","address":"127.0.0.1:9898"}]}]},"qos":{"limiting_version_lag_storage_server":0,"released_transactions_per_second":0,"transactions_per_second_limit":0,"limiting_queue_bytes_storage_server":0,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","description":"The database is not being saturated by the workload.","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]}},"worst_version_lag_storage_server":0,"worst_queue_bytes_log_server":460,"worst_queue_bytes_storage_server":0},"incompatible_connections":[],"full_replication":true,"database_locked":false,"generation":2,"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"description":"","name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"min_replicas_remaining":0},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"max_machine_failures_without_losing_data":0,"moving_data":{"in_queue_bytes":0,"total_written_bytes":0,"in_flight_bytes":0},"least_operating_space_bytes_storage_server":0,"partitions_count":2},"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"database_available":true,"recovery_state":{"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"missing_logs":"7f8d623d0cb9966e","required_resolvers":1,"required_logs":3,"description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0.0,"counter":0,"roughness":0.0},"reads":{"hz":0.0,"counter":0,"roughness":0.0}},"keys":{"read":{"hz":0.0,"counter":0,"roughness":0.0}},"bytes":{"read":{"hz":0.0,"counter":0,"roughness":0.0},"written":{"hz":0.0,"counter":0,"roughness":0.0}},"transactions":{"started":{"hz":0.0,"counter":0,"roughness":0.0},"conflicted":{"hz":0.0,"counter":0,"roughness":0.0},"committed":{"hz":0.0,"counter":0,"roughness":0.0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","configuration":{"resolvers":1,"regions":[{"satellite_redundancy_mode":"one_satellite_single","satellite_anti_quorum":0,"satellite_usable_dcs":1,"datacenters":[{"priority":1,"satellite":1,"id":"mr"}],"satellite_log_policy":"(zoneid^3x1)","satellite_log_replicas":1,"satellite_logs":2}],"remote_logs":5,"auto_logs":3,"logs":2,"log_anti_quorum":0,"storage_replicas":1,"log_replicas":2,"remote_redundancy_mode":"remote_single","storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"log_replication_policy":"(zoneid^3x1)","log_routers":10,"storage_replication_policy":"(zoneid^3x1)","remote_log_replicas":3,"excluded_servers":[{"address":"10.0.4.1"}],"auto_proxies":3,"proxies":5,"usable_regions":1,"redundancy_mode":"single","auto_resolvers":1},"latency_probe":{"immediate_priority_transaction_start_seconds":0.0,"transaction_start_seconds":0.0,"batch_priority_transaction_start_seconds":0.0,"read_seconds":7,"commit_seconds":0.02},"machines":{"$map":{"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"tcp_segments_retransmitted":{"hz":0.0}},"locality":{"$map":"value"},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","cpu":{"logical_core_utilization":0.4}}},"old_logs":[{"satellite_log_fault_tolerance":2,"logs":[{"healthy":true,"id":"7f8d623d0cb9966e","address":"1.2.3.4:1234"}],"satellite_log_write_anti_quorum":0,"remote_log_fault_tolerance":2,"log_fault_tolerance":2,"log_write_anti_quorum":0,"satellite_log_replication_factor":3,"remote_log_replication_factor":3,"log_replication_factor":3}]},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"database_status":{"available":true,"healthy":true}}} diff --git a/tests/slow/DDBalanceAndRemoveStatus.txt b/tests/slow/DDBalanceAndRemoveStatus.txt index 7e07285c3c..17cb5900b4 100644 --- a/tests/slow/DDBalanceAndRemoveStatus.txt +++ b/tests/slow/DDBalanceAndRemoveStatus.txt @@ -43,4 +43,4 @@ testTitle=DDBalance_test testName=Status testDuration=30.0 - schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"processes":{"$map":{"version":"3.0.0","machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","locality":{"$map":"value"},"class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"input_bytes":{"hz":0,"counter":0,"roughness":0},"stored_bytes":12341234,"kvstore_used_bytes":12341234,"kvstore_available_bytes":12341234,"kvstore_free_bytes":12341234,"kvstore_total_bytes":12341234,"durable_bytes":{"hz":0,"counter":0,"roughness":0},"queue_disk_used_bytes":12341234,"queue_disk_available_bytes":12341234,"queue_disk_free_bytes":12341234,"queue_disk_total_bytes":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"data_version":12341234,"data_version_lag":12341234,"id":"eb84471d68c12d1d26f692a50000003f","finished_queries":{"hz":0,"counter":0,"roughness":0}}],"command_line":"-r simulation","memory":{"available_bytes":0,"limit_bytes":0,"unused_allocated_memory":0,"used_bytes":0},"messages":[{"time":12345.12312,"type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","description":"abc"}],"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","excluded":false,"address":"1.2.3.4:1234","disk":{"free_bytes":3451233456234,"reads":{"hz":0,"counter":0,"sectors":0},"busy":0,"writes":{"hz":0,"counter":0,"sectors":0},"total_bytes":123412341234},"uptime_seconds":1234.2345,"cpu":{"usage_cores":0},"network":{"current_connections":0,"connections_established":{"hz":0},"connections_closed":{"hz":0},"connection_errors":{"hz":0},"megabits_sent":{"hz":0},"megabits_received":{"hz":0}}}},"old_logs":[{"logs":[{"id":"7f8d623d0cb9966e","healthy":true,"address":"1.2.3.4:1234"}],"log_replication_factor":3,"log_write_anti_quorum":0,"log_fault_tolerance":2,"remote_log_replication_factor":3,"remote_log_fault_tolerance":2,"satellite_log_replication_factor":3,"satellite_log_write_anti_quorum":0,"satellite_log_fault_tolerance":2}],"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"qos":{"worst_queue_bytes_log_server":460,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]},"description":"The database is not being saturated by the workload."},"transactions_per_second_limit":0,"released_transactions_per_second":0,"limiting_queue_bytes_storage_server":0,"worst_queue_bytes_storage_server":0,"limiting_version_lag_storage_server":0,"worst_version_lag_storage_server":0},"incompatible_connections":[],"datacenter_version_difference":0,"database_available":true,"database_locked":false,"generation":2,"latency_probe":{"read_seconds":7,"immediate_priority_transaction_start_seconds":0,"batch_priority_transaction_start_seconds":0,"transaction_start_seconds":0,"commit_seconds":0.02},"clients":{"count":1,"supported_versions":[{"client_version":"3.0.0","connected_clients":[{"address":"127.0.0.1:9898","log_group":"default"}],"count":1,"protocol_version":"fdb00a400050001","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d"}]},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"recovery_state":{"required_resolvers":1,"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"required_logs":3,"missing_logs":"7f8d623d0cb9966e","description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0,"counter":0,"roughness":0},"reads":{"hz":0,"counter":0,"roughness":0}},"bytes":{"written":{"hz":0,"counter":0,"roughness":0},"read":{"hz":0,"counter":0,"roughness":0}},"keys":{"read":{"hz":0,"counter":0,"roughness":0}},"transactions":{"started":{"hz":0,"counter":0,"roughness":0},"conflicted":{"hz":0,"counter":0,"roughness":0},"committed":{"hz":0,"counter":0,"roughness":0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","full_replication":true,"configuration":{"log_anti_quorum":0,"log_replicas":2,"log_replication_policy":"(zoneid^3x1)","redundancy_mode":"single","regions":[{"datacenters":[{"id":"mr","priority":1,"satellite":1}],"satellite_redundancy_mode":"one_satellite_single","satellite_log_replicas":1,"satellite_usable_dcs":1,"satellite_anti_quorum":0,"satellite_log_policy":"(zoneid^3x1)","satellite_logs":2}],"remote_redundancy_mode":"remote_single","remote_log_replicas":3,"remote_logs":5,"usable_regions":1,"storage_replicas":1,"resolvers":1,"storage_replication_policy":"(zoneid^3x1)","logs":2,"storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"excluded_servers":[{"address":"10.0.4.1"}],"auto_proxies":3,"auto_resolvers":1,"auto_logs":3,"proxies":5},"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"min_replicas_remaining":0,"name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"description":""},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"partitions_count":2,"moving_data":{"total_written_bytes":0,"in_flight_bytes":0,"in_queue_bytes":0},"least_operating_space_bytes_storage_server":0,"max_machine_failures_without_losing_data":0},"machines":{"$map":{"network":{"megabits_sent":{"hz":0},"megabits_received":{"hz":0},"tcp_segments_retransmitted":{"hz":0}},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","locality":{"$map":"value"},"cpu":{"logical_core_utilization":0.4}}}},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"database_status":{"available":true,"healthy":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true}}} + schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"datacenter_version_difference":0,"processes":{"$map":{"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"data_version_lag":12341234,"input_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"kvstore_used_bytes":12341234,"stored_bytes":12341234,"kvstore_free_bytes":12341234,"durable_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"id":"eb84471d68c12d1d26f692a50000003f","data_version":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"queue_disk_available_bytes":12341234,"kvstore_available_bytes":12341234,"queue_disk_total_bytes":12341234,"queue_disk_used_bytes":12341234,"queue_disk_free_bytes":12341234,"kvstore_total_bytes":12341234,"finished_queries":{"hz":0.0,"counter":0,"roughness":0.0}}],"locality":{"$map":"value"},"messages":[{"description":"abc","type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","time":12345.12312}],"address":"1.2.3.4:1234","command_line":"-r simulation","disk":{"free_bytes":3451233456234,"reads":{"hz":0.0,"counter":0,"sectors":0},"busy":0.0,"writes":{"hz":0.0,"counter":0,"sectors":0},"total_bytes":123412341234},"version":"3.0.0","excluded":false,"memory":{"available_bytes":0,"unused_allocated_memory":0,"limit_bytes":0,"used_bytes":0},"machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","uptime_seconds":1234.2345,"cpu":{"usage_cores":0.0},"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"connections_closed":{"hz":0.0},"connection_errors":{"hz":0.0},"current_connections":0,"connections_established":{"hz":0.0}}}},"clients":{"count":1,"supported_versions":[{"count":1,"protocol_version":"fdb00a400050001","client_version":"3.0.0","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d","connected_clients":[{"log_group":"default","address":"127.0.0.1:9898"}]}]},"qos":{"limiting_version_lag_storage_server":0,"released_transactions_per_second":0,"transactions_per_second_limit":0,"limiting_queue_bytes_storage_server":0,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","description":"The database is not being saturated by the workload.","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]}},"worst_version_lag_storage_server":0,"worst_queue_bytes_log_server":460,"worst_queue_bytes_storage_server":0},"incompatible_connections":[],"full_replication":true,"database_locked":false,"generation":2,"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"description":"","name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"min_replicas_remaining":0},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"max_machine_failures_without_losing_data":0,"moving_data":{"in_queue_bytes":0,"total_written_bytes":0,"in_flight_bytes":0},"least_operating_space_bytes_storage_server":0,"partitions_count":2},"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"database_available":true,"recovery_state":{"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"missing_logs":"7f8d623d0cb9966e","required_resolvers":1,"required_logs":3,"description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0.0,"counter":0,"roughness":0.0},"reads":{"hz":0.0,"counter":0,"roughness":0.0}},"keys":{"read":{"hz":0.0,"counter":0,"roughness":0.0}},"bytes":{"read":{"hz":0.0,"counter":0,"roughness":0.0},"written":{"hz":0.0,"counter":0,"roughness":0.0}},"transactions":{"started":{"hz":0.0,"counter":0,"roughness":0.0},"conflicted":{"hz":0.0,"counter":0,"roughness":0.0},"committed":{"hz":0.0,"counter":0,"roughness":0.0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","configuration":{"resolvers":1,"regions":[{"satellite_redundancy_mode":"one_satellite_single","satellite_anti_quorum":0,"satellite_usable_dcs":1,"datacenters":[{"priority":1,"satellite":1,"id":"mr"}],"satellite_log_policy":"(zoneid^3x1)","satellite_log_replicas":1,"satellite_logs":2}],"remote_logs":5,"auto_logs":3,"logs":2,"log_anti_quorum":0,"storage_replicas":1,"log_replicas":2,"remote_redundancy_mode":"remote_single","storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"log_replication_policy":"(zoneid^3x1)","log_routers":10,"storage_replication_policy":"(zoneid^3x1)","remote_log_replicas":3,"excluded_servers":[{"address":"10.0.4.1"}],"auto_proxies":3,"proxies":5,"usable_regions":1,"redundancy_mode":"single","auto_resolvers":1},"latency_probe":{"immediate_priority_transaction_start_seconds":0.0,"transaction_start_seconds":0.0,"batch_priority_transaction_start_seconds":0.0,"read_seconds":7,"commit_seconds":0.02},"machines":{"$map":{"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"tcp_segments_retransmitted":{"hz":0.0}},"locality":{"$map":"value"},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","cpu":{"logical_core_utilization":0.4}}},"old_logs":[{"satellite_log_fault_tolerance":2,"logs":[{"healthy":true,"id":"7f8d623d0cb9966e","address":"1.2.3.4:1234"}],"satellite_log_write_anti_quorum":0,"remote_log_fault_tolerance":2,"log_fault_tolerance":2,"log_write_anti_quorum":0,"satellite_log_replication_factor":3,"remote_log_replication_factor":3,"log_replication_factor":3}]},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"database_status":{"available":true,"healthy":true}}} From 5fc8199abc8c3436122c69ecc1743e236cae4758 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Fri, 22 Jun 2018 10:15:24 -0700 Subject: [PATCH 20/26] Swapped OkayFit and UnsetFit, because generally if machine classes are set on one machine they are set everywhere and it helps with wait_for_good_recruitment logic wait_for_good_recruitment now requires that you have the desired count of each roll remote recruitment is given a much longer wait_for_good_recruitment time interval, which does not start until enough remote machines have registered --- fdbrpc/Locality.h | 2 +- fdbserver/ClusterController.actor.cpp | 34 ++++++++++++++++++--------- fdbserver/Knobs.cpp | 13 +++++----- fdbserver/Knobs.h | 1 + fdbserver/LeaderElection.actor.cpp | 5 +++- 5 files changed, 36 insertions(+), 19 deletions(-) diff --git a/fdbrpc/Locality.h b/fdbrpc/Locality.h index 19b4eb24b6..7b404413e9 100644 --- a/fdbrpc/Locality.h +++ b/fdbrpc/Locality.h @@ -27,7 +27,7 @@ struct ProcessClass { // This enum is stored in restartInfo.ini for upgrade tests, so be very careful about changing the existing items! enum ClassType { UnsetClass, StorageClass, TransactionClass, ResolutionClass, TesterClass, ProxyClass, MasterClass, StatelessClass, LogClass, ClusterControllerClass, LogRouterClass, InvalidClass = -1 }; - enum Fitness { BestFit, GoodFit, OkayFit, UnsetFit, WorstFit, ExcludeFit, NeverAssign }; //cannot be larger than 7 because of leader election mask + enum Fitness { BestFit, GoodFit, UnsetFit, OkayFit, WorstFit, ExcludeFit, NeverAssign }; //cannot be larger than 7 because of leader election mask enum ClusterRole { Storage, TLog, Proxy, Master, Resolver, LogRouter, ClusterController }; enum ClassSource { CommandLineSource, AutoSource, DBSource, InvalidSource = -1 }; int16_t _class; diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index ce51234fbd..9a46f2430c 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -439,6 +439,13 @@ public: return false; } + bool betterCount (RoleFitness const& r) const { + if(count != r.count) return count > r.count; + if (worstFit != r.worstFit) return worstFit < r.worstFit; + if (bestFit != r.bestFit) return bestFit < r.bestFit; + return false; + } + bool operator == (RoleFitness const& r) const { return worstFit == r.worstFit && bestFit == r.bestFit && count == r.count; } }; @@ -470,9 +477,13 @@ public: result.logRouters.push_back(logRouters[i].first); } - if( now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY && - ( ( RoleFitness(remoteLogs, ProcessClass::TLog) > RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs()) ) || - ( RoleFitness(logRouters, ProcessClass::LogRouter) > RoleFitness(SERVER_KNOBS->EXPECTED_LOG_ROUTER_FITNESS, req.logRouterCount) ) ) ) { + if(!remoteStartTime.present()) { + remoteStartTime = now(); + } + + if( now() - remoteStartTime.get() < SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY && + ( ( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs()).betterCount(RoleFitness(remoteLogs, ProcessClass::TLog)) ) || + ( RoleFitness(SERVER_KNOBS->EXPECTED_LOG_ROUTER_FITNESS, req.logRouterCount).betterCount(RoleFitness(logRouters, ProcessClass::LogRouter)) ) ) ) { throw operation_failed(); } @@ -561,10 +572,10 @@ public: } if( now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY && - ( RoleFitness(tlogs, ProcessClass::TLog) > RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs()) || - ( region.satelliteTLogReplicationFactor > 0 && RoleFitness(satelliteLogs, ProcessClass::TLog) > RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredSatelliteLogs(dcId)) ) || - RoleFitness(proxies, ProcessClass::Proxy) > RoleFitness(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, req.configuration.getDesiredProxies()) || - RoleFitness(resolvers, ProcessClass::Resolver) > RoleFitness(SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS, req.configuration.getDesiredResolvers()) ) ) { + ( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs()).betterCount(RoleFitness(tlogs, ProcessClass::TLog)) || + ( region.satelliteTLogReplicationFactor > 0 && RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredSatelliteLogs(dcId)).betterCount(RoleFitness(satelliteLogs, ProcessClass::TLog)) ) || + RoleFitness(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, req.configuration.getDesiredProxies()).betterCount(RoleFitness(proxies, ProcessClass::Proxy)) || + RoleFitness(SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS, req.configuration.getDesiredResolvers()).betterCount(RoleFitness(resolvers, ProcessClass::Resolver)) ) ) { return operation_failed(); } @@ -592,7 +603,7 @@ public: } throw no_more_servers(); } catch( Error& e ) { - if (e.code() != error_code_no_more_servers || regions[1].priority < 0 || now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) { + if (e.code() != error_code_no_more_servers || regions[1].priority < 0 || now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY) { throw; } TraceEvent(SevWarn, "AttemptingRecruitmentInRemoteDC", id).error(e); @@ -702,8 +713,8 @@ public: .detail("DesiredResolvers", req.configuration.getDesiredResolvers()).detail("ActualResolvers", result.resolvers.size()); if( now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY && - ( RoleFitness(tlogs, ProcessClass::TLog) > RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs()) || - bestFitness > RoleFitness(std::min(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS), std::max(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS), req.configuration.getDesiredProxies()+req.configuration.getDesiredResolvers()) ) ) { + ( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs()).betterCount(RoleFitness(tlogs, ProcessClass::TLog)) || + RoleFitness(std::min(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS), std::max(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS), req.configuration.getDesiredProxies()+req.configuration.getDesiredResolvers()).betterCount(bestFitness) ) ) { throw operation_failed(); } @@ -959,6 +970,7 @@ public: DBInfo db; Database cx; double startTime; + Optional remoteStartTime; Version datacenterVersionDifference; bool versionDifferenceUpdated; @@ -1482,7 +1494,7 @@ ACTOR Future clusterRecruitRemoteFromConfiguration( ClusterControllerData* req.reply.send( self->findRemoteWorkersForConfiguration( req ) ); return Void(); } catch (Error& e) { - if (e.code() == error_code_no_more_servers && now() - self->startTime >= SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) { + if (e.code() == error_code_no_more_servers && self->remoteStartTime.present() && now() - self->remoteStartTime.get() >= SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY) { self->outstandingRemoteRecruitmentRequests.push_back( req ); TraceEvent(SevWarn, "RecruitRemoteFromConfigurationNotAvailable", self->id).error(e); return Void(); diff --git a/fdbserver/Knobs.cpp b/fdbserver/Knobs.cpp index 8d127b56dc..7bb1065e5d 100644 --- a/fdbserver/Knobs.cpp +++ b/fdbserver/Knobs.cpp @@ -270,7 +270,8 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) { init( SHUTDOWN_TIMEOUT, 600 ); if( randomize && BUGGIFY ) SHUTDOWN_TIMEOUT = 60.0; init( MASTER_SPIN_DELAY, 1.0 ); if( randomize && BUGGIFY ) MASTER_SPIN_DELAY = 10.0; init( CC_CHANGE_DELAY, 0.1 ); - init( WAIT_FOR_GOOD_RECRUITMENT_DELAY, 0.1 ); + init( WAIT_FOR_GOOD_RECRUITMENT_DELAY, 1.0 ); + init( WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY, 5.0 ); init( ATTEMPT_RECRUITMENT_DELAY, 0.035 ); init( WORKER_FAILURE_TIME, 1.0 ); if( randomize && BUGGIFY ) WORKER_FAILURE_TIME = 10.0; init( CHECK_BETTER_MASTER_INTERVAL, 1.0 ); if( randomize && BUGGIFY ) CHECK_BETTER_MASTER_INTERVAL = 0.001; @@ -278,11 +279,11 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) { init( MAX_VERSION_DIFFERENCE, 20 * VERSIONS_PER_SECOND ); init( INCOMPATIBLE_PEERS_LOGGING_INTERVAL, 600 ); if( randomize && BUGGIFY ) INCOMPATIBLE_PEERS_LOGGING_INTERVAL = 60.0; - init( EXPECTED_MASTER_FITNESS, ProcessClass::GoodFit ); - init( EXPECTED_TLOG_FITNESS, ProcessClass::GoodFit ); - init( EXPECTED_LOG_ROUTER_FITNESS, ProcessClass::GoodFit ); - init( EXPECTED_PROXY_FITNESS, ProcessClass::GoodFit ); - init( EXPECTED_RESOLVER_FITNESS, ProcessClass::GoodFit ); + init( EXPECTED_MASTER_FITNESS, ProcessClass::UnsetFit ); + init( EXPECTED_TLOG_FITNESS, ProcessClass::UnsetFit ); + init( EXPECTED_LOG_ROUTER_FITNESS, ProcessClass::UnsetFit ); + init( EXPECTED_PROXY_FITNESS, ProcessClass::UnsetFit ); + init( EXPECTED_RESOLVER_FITNESS, ProcessClass::UnsetFit ); init( RECRUITMENT_TIMEOUT, 600 ); if( randomize && BUGGIFY ) RECRUITMENT_TIMEOUT = g_random->coinflip() ? 60.0 : 1.0; init( POLICY_RATING_TESTS, 200 ); if( randomize && BUGGIFY ) POLICY_RATING_TESTS = 20; diff --git a/fdbserver/Knobs.h b/fdbserver/Knobs.h index 27125fb8e2..62a07c1c99 100644 --- a/fdbserver/Knobs.h +++ b/fdbserver/Knobs.h @@ -212,6 +212,7 @@ public: double MASTER_SPIN_DELAY; double CC_CHANGE_DELAY; double WAIT_FOR_GOOD_RECRUITMENT_DELAY; + double WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY; double ATTEMPT_RECRUITMENT_DELAY; double WORKER_FAILURE_TIME; double CHECK_BETTER_MASTER_INTERVAL; diff --git a/fdbserver/LeaderElection.actor.cpp b/fdbserver/LeaderElection.actor.cpp index b7b9a78e41..174d4819aa 100644 --- a/fdbserver/LeaderElection.actor.cpp +++ b/fdbserver/LeaderElection.actor.cpp @@ -82,7 +82,10 @@ ACTOR Future tryBecomeLeaderInternal( ServerCoordinators coordinators, Val state bool iAmLeader = false; state UID prevChangeID; - if( asyncPriorityInfo->get().processClassFitness > ProcessClass::UnsetFit || asyncPriorityInfo->get().dcFitness == ClusterControllerPriorityInfo::FitnessBad || asyncPriorityInfo->get().isExcluded ) { + + if(asyncPriorityInfo->get().dcFitness == ClusterControllerPriorityInfo::FitnessBad || asyncPriorityInfo->get().dcFitness == ClusterControllerPriorityInfo::FitnessRemote || asyncPriorityInfo->get().isExcluded) { + Void _ = wait( delay(SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY) ); + } else if( asyncPriorityInfo->get().processClassFitness > ProcessClass::UnsetFit ) { Void _ = wait( delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) ); } From 96b0a91ab28388569b307ba253cc87fb3b149776 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Fri, 22 Jun 2018 10:38:36 -0700 Subject: [PATCH 21/26] simplified betterCount logic --- fdbserver/ClusterController.actor.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 9a46f2430c..7475b8213a 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -440,10 +440,8 @@ public: } bool betterCount (RoleFitness const& r) const { - if(count != r.count) return count > r.count; - if (worstFit != r.worstFit) return worstFit < r.worstFit; - if (bestFit != r.bestFit) return bestFit < r.bestFit; - return false; + if(count > r.count) return true; + return worstFit < r.worstFit; } bool operator == (RoleFitness const& r) const { return worstFit == r.worstFit && bestFit == r.bestFit && count == r.count; } From 398497f5c3253d0f0b98381fb22ba9eea09b17c7 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Fri, 22 Jun 2018 12:24:01 -0700 Subject: [PATCH 22/26] fix: wrong desired count used when checking good remote fitness --- fdbserver/ClusterController.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 7475b8213a..3fbcd52280 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -480,7 +480,7 @@ public: } if( now() - remoteStartTime.get() < SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY && - ( ( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs()).betterCount(RoleFitness(remoteLogs, ProcessClass::TLog)) ) || + ( ( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredRemoteLogs()).betterCount(RoleFitness(remoteLogs, ProcessClass::TLog)) ) || ( RoleFitness(SERVER_KNOBS->EXPECTED_LOG_ROUTER_FITNESS, req.logRouterCount).betterCount(RoleFitness(logRouters, ProcessClass::LogRouter)) ) ) ) { throw operation_failed(); } From 1bd1d9562a42e55142cd22e9d755e2f40106746a Mon Sep 17 00:00:00 2001 From: xmeng Date: Sun, 24 Jun 2018 21:54:23 +0100 Subject: [PATCH 23/26] Fix indentation --- recipes/java-recipes/MicroQueue.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/recipes/java-recipes/MicroQueue.java b/recipes/java-recipes/MicroQueue.java index 4b0e0fbc71..09f8ebe1a4 100644 --- a/recipes/java-recipes/MicroQueue.java +++ b/recipes/java-recipes/MicroQueue.java @@ -47,14 +47,14 @@ public class MicroQueue { // Remove from the top of the queue. return tcx.run(new Function(){ public Void apply(Transaction tr){ - final KeyValue item = firstItem(tr); - if(item == null){ - return null; - } + final KeyValue item = firstItem(tr); + if(item == null){ + return null; + } tr.clear(item.getKey()); - // Return the old value. - return Tuple.fromBytes(item.getValue()).get(0); + // Return the old value. + return Tuple.fromBytes(item.getValue()).get(0); } }); From 2ec8744ab38300eded5ec2673826916e3df07556 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Mon, 25 Jun 2018 11:15:49 -0700 Subject: [PATCH 24/26] fix: parallel get more needs to verify the begin version matches the end of the previous request, because when a peek cursor expires we lose all history, so the same sequence number could start at different versions --- fdbserver/LogSystemPeekCursor.actor.cpp | 5 +++++ fdbserver/OldTLogServer.actor.cpp | 1 + fdbserver/TLogInterface.h | 3 ++- fdbserver/TLogServer.actor.cpp | 2 ++ 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fdbserver/LogSystemPeekCursor.actor.cpp b/fdbserver/LogSystemPeekCursor.actor.cpp index 18c3c4fec9..8fb40ecf46 100644 --- a/fdbserver/LogSystemPeekCursor.actor.cpp +++ b/fdbserver/LogSystemPeekCursor.actor.cpp @@ -143,6 +143,7 @@ ACTOR Future serverPeekParallelGetMore( ILogSystem::ServerPeekCursor* self } loop { + state Version expectedBegin = self->messageVersion.version; try { while(self->futureResults.size() < SERVER_KNOBS->PARALLEL_GET_MORE_REQUESTS && self->interf->get().present()) { self->futureResults.push_back( brokenPromiseToNever( self->interf->get().interf().peekMessages.getReply(TLogPeekRequest(self->messageVersion.version,self->tag,self->returnIfBlocked, std::make_pair(self->randomID, self->sequence++)), taskID) ) ); @@ -150,6 +151,10 @@ ACTOR Future serverPeekParallelGetMore( ILogSystem::ServerPeekCursor* self choose { when( TLogPeekReply res = wait( self->interf->get().present() ? self->futureResults.front() : Never() ) ) { + if(res.begin.get() != expectedBegin) { + throw timed_out(); + } + expectedBegin = res.end; self->futureResults.pop_front(); self->results = res; if(res.popped.present()) diff --git a/fdbserver/OldTLogServer.actor.cpp b/fdbserver/OldTLogServer.actor.cpp index 7aa9e53a7e..cb19a31118 100644 --- a/fdbserver/OldTLogServer.actor.cpp +++ b/fdbserver/OldTLogServer.actor.cpp @@ -975,6 +975,7 @@ namespace oldTLog { } else { sequenceData.send(reply.end); } + reply.begin = req.begin; } req.reply.send( reply ); diff --git a/fdbserver/TLogInterface.h b/fdbserver/TLogInterface.h index 4918c669b5..805cc5f111 100644 --- a/fdbserver/TLogInterface.h +++ b/fdbserver/TLogInterface.h @@ -140,10 +140,11 @@ struct TLogPeekReply { Optional popped; Version maxKnownVersion; Version minKnownCommittedVersion; + Optional begin; template void serialize(Ar& ar) { - ar & arena & messages & end & popped & maxKnownVersion & minKnownCommittedVersion; + ar & arena & messages & end & popped & maxKnownVersion & minKnownCommittedVersion & begin; } }; diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 2f9db97a65..b1b76daf1c 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -1007,6 +1007,7 @@ ACTOR Future tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere } else { sequenceData.send(rep.end); } + rep.begin = req.begin; } req.reply.send( rep ); @@ -1068,6 +1069,7 @@ ACTOR Future tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere } else { sequenceData.send(reply.end); } + reply.begin = req.begin; } req.reply.send( reply ); From 1a8dac365d3468951910eb22938f6d76c29f9ede Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Mon, 25 Jun 2018 15:57:11 -0700 Subject: [PATCH 25/26] fix: poppedAllAfter was not set to a large enough version --- fdbserver/storageserver.actor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 67827cbb4c..2c67fa6e74 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -3183,8 +3183,8 @@ ACTOR Future storageServerCore( StorageServer* self, StorageServerInterfac if( self->db->get().recoveryState >= RecoveryState::FULLY_RECOVERED ) { self->logSystem = ILogSystem::fromServerDBInfo( self->thisServerID, self->db->get() ); if (self->logSystem) { - if(self->logSystem->getLogSystemConfig().oldTLogs.size()) { - self->poppedAllAfter = self->logSystem->getLogSystemConfig().oldTLogs[0].epochEnd; + if(self->db->get().logSystemConfig.previousEpochEndVersion.present()) { + self->poppedAllAfter = self->db->get().logSystemConfig.previousEpochEndVersion.get(); } self->logCursor = self->logSystem->peekSingle( self->thisServerID, self->version.get() + 1, self->tag, self->history ); self->popVersion( self->durableVersion.get() + 1, true ); From c6313a79e3680a6de78e12a61eeea9a19ec49ead Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Mon, 25 Jun 2018 18:20:16 -0700 Subject: [PATCH 26/26] fix: the cluster controller needs to continue to retry recruitment until after wait_for_good_remote_recruitment_delay --- fdbserver/ClusterController.actor.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 3fbcd52280..a5af4f162a 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -601,7 +601,11 @@ public: } throw no_more_servers(); } catch( Error& e ) { - if (e.code() != error_code_no_more_servers || regions[1].priority < 0 || now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY) { + if(now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY) { + throw operation_failed(); + } + + if (e.code() != error_code_no_more_servers || regions[1].priority < 0) { throw; } TraceEvent(SevWarn, "AttemptingRecruitmentInRemoteDC", id).error(e);