From 35bef73a1c9fd871dbed982ba78296f7309b588a Mon Sep 17 00:00:00 2001 From: Young Liu Date: Thu, 10 Sep 2020 17:44:15 -0700 Subject: [PATCH 1/3] Rename proxy to commit proxy --- contrib/commit_debug.py | 16 +- design/backup_v2_partitioned_logs.md | 8 +- .../sphinx/source/api-error-codes.rst | 2 +- .../sphinx/source/disk-snapshot-backup.rst | 2 +- .../source/mr-status-json-schemas.rst.inc | 10 +- fdbcli/fdbcli.actor.cpp | 63 +++++--- fdbclient/CMakeLists.txt | 2 +- fdbclient/ClientWorkerInterface.h | 2 +- fdbclient/ClusterInterface.h | 2 +- ...roxyInterface.h => CommitProxyInterface.h} | 19 +-- fdbclient/CoordinationInterface.h | 2 +- fdbclient/DatabaseConfiguration.cpp | 69 ++++----- fdbclient/DatabaseConfiguration.h | 11 +- fdbclient/DatabaseContext.h | 10 +- fdbclient/GrvProxyInterface.h | 2 + fdbclient/Knobs.cpp | 4 +- fdbclient/Knobs.h | 4 +- fdbclient/ManagementAPI.actor.cpp | 29 ++-- fdbclient/ManagementAPI.actor.h | 15 +- fdbclient/MonitorLeader.actor.cpp | 45 +++--- fdbclient/MonitorLeader.h | 7 +- fdbclient/MutationList.h | 3 +- fdbclient/NativeAPI.actor.cpp | 97 ++++++------ fdbclient/NativeAPI.actor.h | 2 +- fdbclient/Schemas.cpp | 14 +- fdbclient/TagThrottle.actor.cpp | 2 +- fdbrpc/Locality.cpp | 8 +- fdbrpc/Locality.h | 13 +- fdbrpc/simulator.h | 3 +- fdbserver/BackupWorker.actor.cpp | 2 +- fdbserver/CMakeLists.txt | 2 +- fdbserver/ClusterController.actor.cpp | 85 +++++------ ....actor.cpp => CommitProxyServer.actor.cpp} | 142 ++++++++---------- fdbserver/GrvProxyServer.actor.cpp | 6 +- fdbserver/Knobs.cpp | 4 +- fdbserver/Knobs.h | 7 +- fdbserver/Ratekeeper.actor.cpp | 68 +++++---- fdbserver/Resolver.actor.cpp | 4 +- fdbserver/SimulatedCluster.actor.cpp | 4 +- fdbserver/Status.actor.cpp | 84 ++++++----- fdbserver/WorkerInterface.actor.h | 29 ++-- fdbserver/fdbserver.actor.cpp | 9 +- fdbserver/masterserver.actor.cpp | 109 +++++++------- fdbserver/storageserver.actor.cpp | 10 +- fdbserver/worker.actor.cpp | 23 +-- .../workloads/ConsistencyCheck.actor.cpp | 39 +++-- fdbserver/workloads/Rollback.actor.cpp | 10 +- fdbserver/workloads/TargetedKill.actor.cpp | 20 +-- flow/error_definitions.h | 6 +- tests/status/invalid_proc_addresses.json | 6 +- .../local_6_machine_no_replicas_remain.json | 6 +- .../separate_2_of_3_coordinators_remain.json | 4 +- .../separate_cannot_write_cluster_file.json | 6 +- tests/status/separate_idle.json | 2 +- tests/status/separate_initializing.json | 2 +- tests/status/separate_no_database.json | 2 +- tests/status/separate_not_enough_servers.json | 4 +- ...single_process_too_many_config_params.json | 2 +- 58 files changed, 598 insertions(+), 565 deletions(-) rename fdbclient/{MasterProxyInterface.h => CommitProxyInterface.h} (96%) rename fdbserver/{MasterProxyServer.actor.cpp => CommitProxyServer.actor.cpp} (95%) diff --git a/contrib/commit_debug.py b/contrib/commit_debug.py index 7f6de3ff91..b37b5260d0 100755 --- a/contrib/commit_debug.py +++ b/contrib/commit_debug.py @@ -24,22 +24,22 @@ def parse_args(): # (e)nd of a span with a better given name locationToPhase = { "NativeAPI.commit.Before": [], - "MasterProxyServer.batcher": [("b", "Commit")], - "MasterProxyServer.commitBatch.Before": [], - "MasterProxyServer.commitBatch.GettingCommitVersion": [("b", "CommitVersion")], - "MasterProxyServer.commitBatch.GotCommitVersion": [("e", "CommitVersion")], + "CommitProxyServer.batcher": [("b", "Commit")], + "CommitProxyServer.commitBatch.Before": [], + "CommitProxyServer.commitBatch.GettingCommitVersion": [("b", "CommitVersion")], + "CommitProxyServer.commitBatch.GotCommitVersion": [("e", "CommitVersion")], "Resolver.resolveBatch.Before": [("b", "Resolver.PipelineWait")], "Resolver.resolveBatch.AfterQueueSizeCheck": [], "Resolver.resolveBatch.AfterOrderer": [("e", "Resolver.PipelineWait"), ("b", "Resolver.Conflicts")], "Resolver.resolveBatch.After": [("e", "Resolver.Conflicts")], - "MasterProxyServer.commitBatch.AfterResolution": [("b", "Proxy.Processing")], - "MasterProxyServer.commitBatch.ProcessingMutations": [], - "MasterProxyServer.commitBatch.AfterStoreCommits": [("e", "Proxy.Processing")], + "CommitProxyServer.commitBatch.AfterResolution": [("b", "Proxy.Processing")], + "CommitProxyServer.commitBatch.ProcessingMutations": [], + "CommitProxyServer.commitBatch.AfterStoreCommits": [("e", "Proxy.Processing")], "TLog.tLogCommit.BeforeWaitForVersion": [("b", "TLog.PipelineWait")], "TLog.tLogCommit.Before": [("e", "TLog.PipelineWait")], "TLog.tLogCommit.AfterTLogCommit": [("b", "TLog.FSync")], "TLog.tLogCommit.After": [("e", "TLog.FSync")], - "MasterProxyServer.commitBatch.AfterLogPush": [("e", "Commit")], + "CommitProxyServer.commitBatch.AfterLogPush": [("e", "Commit")], "NativeAPI.commit.After": [], } diff --git a/design/backup_v2_partitioned_logs.md b/design/backup_v2_partitioned_logs.md index 18369cdd6f..97526f5f89 100644 --- a/design/backup_v2_partitioned_logs.md +++ b/design/backup_v2_partitioned_logs.md @@ -16,7 +16,7 @@ As an essential component of a database system, backup and restore is commonly u ## Background -FDB backup system continuously scan the database’s key-value space, save key-value pairs and mutations at versions into range files and log files in blob storage. Specifically, mutation logs are generated at Proxy, and are written to transaction logs along with regular mutations. In production clusters like CK clusters, backup system is always on, which means each mutation is written twice to transaction logs, consuming about half of write bandwidth and about 40% of Proxy CPU time. +FDB backup system continuously scan the database’s key-value space, save key-value pairs and mutations at versions into range files and log files in blob storage. Specifically, mutation logs are generated at CommitProxy, and are written to transaction logs along with regular mutations. In production clusters like CK clusters, backup system is always on, which means each mutation is written twice to transaction logs, consuming about half of write bandwidth and about 40% of CommitProxy CPU time. The design of old backup system is [here](https://github.com/apple/foundationdb/blob/master/design/backup.md), and the data format of range files and mutations files is [here](https://github.com/apple/foundationdb/blob/master/design/backup-dataFormat.md). The technical overview of FDB is [here](https://github.com/apple/foundationdb/wiki/Technical-Overview-of-the-Database). The FDB recovery is described in this [doc](https://github.com/apple/foundationdb/blob/master/design/recovery-internals.md). @@ -37,7 +37,7 @@ The design of old backup system is [here](https://github.com/apple/foundationdb/ Feature priorities: Feature 1, 2, 3, 4, 5 are must-have; Feature 6 is better to have. -1. **Write bandwidth reduction by half**: removes the requirement to generate backup mutations at the Proxy, thus reduce TLog write bandwidth usage by half and significantly improve Proxy CPU usage; +1. **Write bandwidth reduction by half**: removes the requirement to generate backup mutations at the CommitProxy, thus reduce TLog write bandwidth usage by half and significantly improve CommitProxy CPU usage; 2. **Correctness**: The restored database must be consistent: each *restored* state (i.e., key-value pair) at a version `v` must match the original state at version `v`. 3. **Performance**: The backup system should be performant, mostly measured as a small CPU overhead on transaction logs and backup workers. The version lag on backup workers is an indicator of performance. 4. **Fault-tolerant**: The backup system should be fault-tolerant to node failures in the FDB cluster. @@ -153,9 +153,9 @@ The requirement of the new backup system raises several design challenges: **Master**: The master is responsible for coordinating the transition of the FDB transaction sub-system from one generation to the next. In particular, the master recruits backup workers during the recovery. -**Transaction Logs (TLogs)**: The transaction logs make mutations durable to disk for fast commit latencies. The logs receive commits from the proxy in version order, and only respond to the proxy once the data has been written and fsync'ed to an append only mutation log on disk. Storage servers retrieve mutations from TLogs. Once the storage servers have persisted mutations, storage servers then pop the mutations from the TLogs. +**Transaction Logs (TLogs)**: The transaction logs make mutations durable to disk for fast commit latencies. The logs receive commits from the commit proxy in version order, and only respond to the commit proxy once the data has been written and fsync'ed to an append only mutation log on disk. Storage servers retrieve mutations from TLogs. Once the storage servers have persisted mutations, storage servers then pop the mutations from the TLogs. -**Proxy**: The proxies are responsible for committing transactions, and tracking the storage servers responsible for each range of keys. In the old backup system, Proxies are responsible to group mutations into backup mutations and write them to the database. +**CommitProxy**: The commit proxies are responsible for committing transactions, and tracking the storage servers responsible for each range of keys. In the old backup system, Proxies are responsible to group mutations into backup mutations and write them to the database. **GrvProxy**: The GRV proxies are responsible for providing read versions. ## System overview diff --git a/documentation/sphinx/source/api-error-codes.rst b/documentation/sphinx/source/api-error-codes.rst index f013f4aabd..48c1c215a6 100644 --- a/documentation/sphinx/source/api-error-codes.rst +++ b/documentation/sphinx/source/api-error-codes.rst @@ -40,7 +40,7 @@ FoundationDB may return the following error codes from API functions. If you nee +-----------------------------------------------+-----+--------------------------------------------------------------------------------+ | external_client_already_loaded | 1040| External client has already been loaded | +-----------------------------------------------+-----+--------------------------------------------------------------------------------+ -| proxy_memory_limit_exceeded | 1042| Proxy commit memory limit exceeded | +| proxy_memory_limit_exceeded | 1042| CommitProxy commit memory limit exceeded | +-----------------------------------------------+-----+--------------------------------------------------------------------------------+ | batch_transaction_throttled | 1051| Batch GRV request rate limit exceeded | +-----------------------------------------------+-----+--------------------------------------------------------------------------------+ diff --git a/documentation/sphinx/source/disk-snapshot-backup.rst b/documentation/sphinx/source/disk-snapshot-backup.rst index e5eccd8051..33b97b8c09 100644 --- a/documentation/sphinx/source/disk-snapshot-backup.rst +++ b/documentation/sphinx/source/disk-snapshot-backup.rst @@ -104,7 +104,7 @@ Field Name Description ``Name for the snapshot file`` recommended name for the disk snapshot cluster-name:ip-addr:port:UID ================================ ======================================================== ======================================================== -``snapshot create binary`` will not be invoked on processes which does not have any persistent data (for example, Cluster Controller or Master or MasterProxy). Since these processes are stateless, there is no need for a snapshot. Any specialized configuration knobs used for one of these stateless processes need to be copied and restored externally. +``snapshot create binary`` will not be invoked on processes which does not have any persistent data (for example, Cluster Controller or Master or CommitProxy). Since these processes are stateless, there is no need for a snapshot. Any specialized configuration knobs used for one of these stateless processes need to be copied and restored externally. Management of disk snapshots ---------------------------- diff --git a/documentation/sphinx/source/mr-status-json-schemas.rst.inc b/documentation/sphinx/source/mr-status-json-schemas.rst.inc index 0f4b6a9aa9..d7af4a0885 100644 --- a/documentation/sphinx/source/mr-status-json-schemas.rst.inc +++ b/documentation/sphinx/source/mr-status-json-schemas.rst.inc @@ -27,7 +27,7 @@ "storage", "transaction", "resolution", - "proxy", + "commit_proxy", "grv_proxy", "master", "test", @@ -61,7 +61,7 @@ "role":{ "$enum":[ "master", - "proxy", + "commit_proxy", "grv_proxy", "log", "storage", @@ -447,7 +447,7 @@ ], "recovery_state":{ "required_resolvers":1, - "required_proxies":1, + "required_commit_proxies":1, "required_grv_proxies":1, "name":{ // "fully_recovered" is the healthy state; other states are normal to transition through but not to persist in "$enum":[ @@ -633,11 +633,11 @@ "address":"10.0.4.1" } ], - "auto_proxies":3, + "auto_commit_proxies":3, "auto_resolvers":1, "auto_logs":3, "backup_worker_enabled":1, - "proxies":5 // this field will be absent if a value has not been explicitly set + "commit_proxies":5 // this field will be absent if a value has not been explicitly set }, "data":{ "least_operating_space_bytes_log_server":0, diff --git a/fdbcli/fdbcli.actor.cpp b/fdbcli/fdbcli.actor.cpp index 6351219341..a304daa2ad 100644 --- a/fdbcli/fdbcli.actor.cpp +++ b/fdbcli/fdbcli.actor.cpp @@ -470,7 +470,8 @@ void initHelp() { "All keys between BEGINKEY (inclusive) and ENDKEY (exclusive) are cleared from the database. This command will succeed even if the specified range is empty, but may fail because of conflicts." ESCAPINGK); helpMap["configure"] = CommandHelp( "configure [new] " - "|grv_" + "|grv_" "proxies=|logs=|resolvers=>*", "change the database configuration", "The `new' option, if present, initializes a new database with the given configuration rather than changing " @@ -479,10 +480,13 @@ void initHelp() { "of data (survive one failure).\n triple - three copies of data (survive two failures).\n three_data_hall - " "See the Admin Guide.\n three_datacenter - See the Admin Guide.\n\nStorage engine:\n ssd - B-Tree storage " "engine optimized for solid state disks.\n memory - Durable in-memory storage engine for small " - "datasets.\n\nproxies=: Sets the desired number of proxies in the cluster. Must be at least 1, or set " - "to -1 which restores the number of proxies to the default value.\n\ngrv_proxies=: Sets the " + "datasets.\n\ncommit_proxies=: Sets the desired number of commit proxies in the cluster. Must " + "be at least 1, or set " + "to -1 which restores the number of commit proxies to the default value.\n\ngrv_proxies=: Sets " + "the " "desired number of GRV proxies in the cluster. Must be at least 1, or set to -1 which restores the number of " - "proxies to the default value.\n\nlogs=: Sets the desired number of log servers in the cluster. Must be " + "GRV proxies to the default value.\n\nlogs=: Sets the desired number of log servers in the cluster. Must " + "be " "at least 1, or set to -1 which restores the number of logs to the default value.\n\nresolvers=: " "Sets the desired number of resolvers in the cluster. Must be at least 1, or set to -1 which restores the " "number of resolvers to the default value.\n\nSee the FoundationDB Administration Guide for more information."); @@ -871,12 +875,13 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level, fatalRecoveryState = true; if (name == "recruiting_transaction_servers") { - description += format("\nNeed at least %d log servers across unique zones, %d proxies, " - "%d GRV proxies and %d resolvers.", - recoveryState["required_logs"].get_int(), - recoveryState["required_proxies"].get_int(), - recoveryState["required_grv_proxies"].get_int(), - recoveryState["required_resolvers"].get_int()); + description += + format("\nNeed at least %d log servers across unique zones, %d commit proxies, " + "%d GRV proxies and %d resolvers.", + recoveryState["required_logs"].get_int(), + recoveryState["required_commit_proxies"].get_int(), + recoveryState["required_grv_proxies"].get_int(), + recoveryState["required_resolvers"].get_int()); if (statusObjCluster.has("machines") && statusObjCluster.has("processes")) { auto numOfNonExcludedProcessesAndZones = getNumOfNonExcludedProcessAndZones(statusObjCluster); description += format("\nHave %d non-excluded processes on %d machines across %d zones.", numOfNonExcludedProcessesAndZones.first, getNumofNonExcludedMachines(statusObjCluster), numOfNonExcludedProcessesAndZones.second); @@ -1026,8 +1031,8 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level, outputString += format("\n Exclusions - %d (type `exclude' for details)", excludedServersArr.size()); } - if (statusObjConfig.get("proxies", intVal)) - outputString += format("\n Desired Proxies - %d", intVal); + if (statusObjConfig.get("commit_proxies", intVal)) + outputString += format("\n Desired Commit Proxies - %d", intVal); if (statusObjConfig.get("grv_proxies", intVal)) outputString += format("\n Desired GRV Proxies - %d", intVal); @@ -1790,14 +1795,14 @@ ACTOR Future configure( Database db, std::vector tokens, Refere bool noChanges = conf.get().old_replication == conf.get().auto_replication && conf.get().old_logs == conf.get().auto_logs && - conf.get().old_proxies == conf.get().auto_proxies && + conf.get().old_commit_proxies == conf.get().auto_commit_proxies && conf.get().old_grv_proxies == conf.get().auto_grv_proxies && conf.get().old_resolvers == conf.get().auto_resolvers && conf.get().old_processes_with_transaction == conf.get().auto_processes_with_transaction && conf.get().old_machines_with_transaction == conf.get().auto_machines_with_transaction; bool noDesiredChanges = noChanges && conf.get().old_logs == conf.get().desired_logs && - conf.get().old_proxies == conf.get().desired_proxies && + conf.get().old_commit_proxies == conf.get().desired_commit_proxies && conf.get().old_grv_proxies == conf.get().desired_grv_proxies && conf.get().old_resolvers == conf.get().desired_resolvers; @@ -1816,8 +1821,11 @@ ACTOR Future configure( Database db, std::vector tokens, Refere outputString += format("| replication | %16s | %16s |\n", conf.get().old_replication.c_str(), conf.get().auto_replication.c_str()); outputString += format("| logs | %16d | %16d |", conf.get().old_logs, conf.get().auto_logs); outputString += conf.get().auto_logs != conf.get().desired_logs ? format(" (manually set; would be %d)\n", conf.get().desired_logs) : "\n"; - outputString += format("| proxies | %16d | %16d |", conf.get().old_proxies, conf.get().auto_proxies); - outputString += conf.get().auto_proxies != conf.get().desired_proxies ? format(" (manually set; would be %d)\n", conf.get().desired_proxies) : "\n"; + outputString += format("| commit_proxies | %16d | %16d |", conf.get().old_commit_proxies, + conf.get().auto_commit_proxies); + outputString += conf.get().auto_commit_proxies != conf.get().desired_commit_proxies + ? format(" (manually set; would be %d)\n", conf.get().desired_commit_proxies) + : "\n"; outputString += format("| grv_proxies | %16d | %16d |", conf.get().old_grv_proxies, conf.get().auto_grv_proxies); outputString += conf.get().auto_grv_proxies != conf.get().desired_grv_proxies @@ -2531,11 +2539,24 @@ void onOffGenerator(const char* text, const char *line, std::vector } void configureGenerator(const char* text, const char *line, std::vector& lc) { - const char* opts[] = { - "new", "single", "double", "triple", "three_data_hall", "three_datacenter", "ssd", - "ssd-1", "ssd-2", "memory", "memory-1", "memory-2", "memory-radixtree-beta", "proxies=", - "grv_proxies=", "logs=", "resolvers=", nullptr - }; + const char* opts[] = { "new", + "single", + "double", + "triple", + "three_data_hall", + "three_datacenter", + "ssd", + "ssd-1", + "ssd-2", + "memory", + "memory-1", + "memory-2", + "memory-radixtree-beta", + "commit_proxies=", + "grv_proxies=", + "logs=", + "resolvers=", + nullptr }; arrayGenerator(text, line, opts, lc); } diff --git a/fdbclient/CMakeLists.txt b/fdbclient/CMakeLists.txt index 43f9343b28..3f7333b632 100644 --- a/fdbclient/CMakeLists.txt +++ b/fdbclient/CMakeLists.txt @@ -33,7 +33,7 @@ set(FDBCLIENT_SRCS Knobs.h ManagementAPI.actor.cpp ManagementAPI.actor.h - MasterProxyInterface.h + CommitProxyInterface.h MetricLogger.actor.cpp MetricLogger.h MonitorLeader.actor.cpp diff --git a/fdbclient/ClientWorkerInterface.h b/fdbclient/ClientWorkerInterface.h index 4b4f822fc9..c4bdb2bc1b 100644 --- a/fdbclient/ClientWorkerInterface.h +++ b/fdbclient/ClientWorkerInterface.h @@ -25,7 +25,7 @@ #include "fdbclient/FDBTypes.h" #include "fdbrpc/FailureMonitor.h" #include "fdbclient/Status.h" -#include "fdbclient/MasterProxyInterface.h" +#include "fdbclient/CommitProxyInterface.h" // Streams from WorkerInterface that are safe and useful to call from a client. // A ClientWorkerInterface is embedded as the first element of a WorkerInterface. diff --git a/fdbclient/ClusterInterface.h b/fdbclient/ClusterInterface.h index c957ae8633..2570666b12 100644 --- a/fdbclient/ClusterInterface.h +++ b/fdbclient/ClusterInterface.h @@ -25,7 +25,7 @@ #include "fdbclient/FDBTypes.h" #include "fdbrpc/FailureMonitor.h" #include "fdbclient/Status.h" -#include "fdbclient/MasterProxyInterface.h" +#include "fdbclient/CommitProxyInterface.h" #include "fdbclient/ClientWorkerInterface.h" struct ClusterInterface { diff --git a/fdbclient/MasterProxyInterface.h b/fdbclient/CommitProxyInterface.h similarity index 96% rename from fdbclient/MasterProxyInterface.h rename to fdbclient/CommitProxyInterface.h index 9e2b49037c..c6b12dd7f2 100644 --- a/fdbclient/MasterProxyInterface.h +++ b/fdbclient/CommitProxyInterface.h @@ -1,6 +1,6 @@ /* - * MasterProxyInterface.h + * CommitProxyInterface.h * * This source file is part of the FoundationDB open source project * @@ -19,8 +19,8 @@ * limitations under the License. */ -#ifndef FDBCLIENT_MASTERPROXYINTERFACE_H -#define FDBCLIENT_MASTERPROXYINTERFACE_H +#ifndef FDBCLIENT_COMMITPROXYINTERFACE_H +#define FDBCLIENT_COMMITPROXYINTERFACE_H #pragma once #include @@ -36,7 +36,7 @@ #include "fdbrpc/TimedRequest.h" #include "GrvProxyInterface.h" -struct MasterProxyInterface { +struct CommitProxyInterface { constexpr static FileIdentifier file_identifier = 8954922; enum { LocationAwareLoadBalance = 1 }; enum { AlwaysFresh = 1 }; @@ -59,8 +59,8 @@ struct MasterProxyInterface { UID id() const { return commit.getEndpoint().token; } std::string toString() const { return id().shortString(); } - bool operator == (MasterProxyInterface const& r) const { return id() == r.id(); } - bool operator != (MasterProxyInterface const& r) const { return id() != r.id(); } + bool operator==(CommitProxyInterface const& r) const { return id() == r.id(); } + bool operator!=(CommitProxyInterface const& r) const { return id() != r.id(); } NetworkAddress address() const { return commit.getEndpoint().getPrimaryAddress(); } template @@ -101,8 +101,9 @@ struct ClientDBInfo { constexpr static FileIdentifier file_identifier = 5355080; UID id; // Changes each time anything else changes vector< GrvProxyInterface > grvProxies; - vector< MasterProxyInterface > masterProxies; - Optional firstProxy; //not serialized, used for commitOnFirstProxy when the proxies vector has been shrunk + vector commitProxies; + Optional + firstCommitProxy; // not serialized, used for commitOnFirstProxy when the proxies vector has been shrunk double clientTxnInfoSampleRate; int64_t clientTxnInfoSizeLimit; Optional forward; @@ -122,7 +123,7 @@ struct ClientDBInfo { if constexpr (!is_fb_function) { ASSERT(ar.protocolVersion().isValid()); } - serializer(ar, grvProxies, masterProxies, id, clientTxnInfoSampleRate, clientTxnInfoSizeLimit, forward, + serializer(ar, grvProxies, commitProxies, id, clientTxnInfoSampleRate, clientTxnInfoSizeLimit, forward, transactionTagSampleRate, transactionTagSampleCost); } }; diff --git a/fdbclient/CoordinationInterface.h b/fdbclient/CoordinationInterface.h index 0dc2970ca1..95423bf6ca 100644 --- a/fdbclient/CoordinationInterface.h +++ b/fdbclient/CoordinationInterface.h @@ -25,7 +25,7 @@ #include "fdbclient/FDBTypes.h" #include "fdbrpc/fdbrpc.h" #include "fdbrpc/Locality.h" -#include "fdbclient/MasterProxyInterface.h" +#include "fdbclient/CommitProxyInterface.h" #include "fdbclient/ClusterInterface.h" const int MAX_CLUSTER_FILE_BYTES = 60000; diff --git a/fdbclient/DatabaseConfiguration.cpp b/fdbclient/DatabaseConfiguration.cpp index b1580205a0..f70fc4275c 100644 --- a/fdbclient/DatabaseConfiguration.cpp +++ b/fdbclient/DatabaseConfiguration.cpp @@ -29,12 +29,12 @@ DatabaseConfiguration::DatabaseConfiguration() void DatabaseConfiguration::resetInternal() { // does NOT reset rawConfiguration initialized = false; - proxyCount = grvProxyCount = resolverCount = desiredTLogCount = tLogWriteAntiQuorum = tLogReplicationFactor = + commitProxyCount = grvProxyCount = resolverCount = desiredTLogCount = tLogWriteAntiQuorum = tLogReplicationFactor = storageTeamSize = desiredLogRouterCount = -1; tLogVersion = TLogVersion::DEFAULT; tLogDataStoreType = storageServerStoreType = KeyValueStoreType::END; tLogSpillType = TLogSpillType::DEFAULT; - autoProxyCount = CLIENT_KNOBS->DEFAULT_AUTO_PROXIES; + autoCommitProxyCount = CLIENT_KNOBS->DEFAULT_AUTO_COMMIT_PROXIES; autoGrvProxyCount = CLIENT_KNOBS->DEFAULT_AUTO_GRV_PROXIES; autoResolverCount = CLIENT_KNOBS->DEFAULT_AUTO_RESOLVERS; autoDesiredTLogCount = CLIENT_KNOBS->DEFAULT_AUTO_LOGS; @@ -164,38 +164,21 @@ void DatabaseConfiguration::setDefaultReplicationPolicy() { } bool DatabaseConfiguration::isValid() const { - if( !(initialized && - tLogWriteAntiQuorum >= 0 && - tLogWriteAntiQuorum <= tLogReplicationFactor/2 && - tLogReplicationFactor >= 1 && - storageTeamSize >= 1 && - getDesiredProxies() >= 1 && - getDesiredGrvProxies() >= 1 && - getDesiredLogs() >= 1 && - getDesiredResolvers() >= 1 && - tLogVersion != TLogVersion::UNSET && - tLogVersion >= TLogVersion::MIN_RECRUITABLE && - tLogVersion <= TLogVersion::MAX_SUPPORTED && - tLogDataStoreType != KeyValueStoreType::END && - tLogSpillType != TLogSpillType::UNSET && - !(tLogSpillType == TLogSpillType::REFERENCE && tLogVersion < TLogVersion::V3) && - storageServerStoreType != KeyValueStoreType::END && - autoProxyCount >= 1 && - autoGrvProxyCount >= 1 && - autoResolverCount >= 1 && - autoDesiredTLogCount >= 1 && - storagePolicy && - tLogPolicy && - getDesiredRemoteLogs() >= 1 && - remoteTLogReplicationFactor >= 0 && - repopulateRegionAntiQuorum >= 0 && - repopulateRegionAntiQuorum <= 1 && - usableRegions >= 1 && - usableRegions <= 2 && - regions.size() <= 2 && - ( usableRegions == 1 || regions.size() == 2 ) && - ( regions.size() == 0 || regions[0].priority >= 0 ) && - ( regions.size() == 0 || tLogPolicy->info() != "dcid^2 x zoneid^2 x 1") ) ) { //We cannot specify regions with three_datacenter replication + if (!(initialized && tLogWriteAntiQuorum >= 0 && tLogWriteAntiQuorum <= tLogReplicationFactor / 2 && + tLogReplicationFactor >= 1 && storageTeamSize >= 1 && getDesiredCommitProxies() >= 1 && + getDesiredGrvProxies() >= 1 && getDesiredLogs() >= 1 && getDesiredResolvers() >= 1 && + tLogVersion != TLogVersion::UNSET && tLogVersion >= TLogVersion::MIN_RECRUITABLE && + tLogVersion <= TLogVersion::MAX_SUPPORTED && tLogDataStoreType != KeyValueStoreType::END && + tLogSpillType != TLogSpillType::UNSET && + !(tLogSpillType == TLogSpillType::REFERENCE && tLogVersion < TLogVersion::V3) && + storageServerStoreType != KeyValueStoreType::END && autoCommitProxyCount >= 1 && autoGrvProxyCount >= 1 && + autoResolverCount >= 1 && autoDesiredTLogCount >= 1 && storagePolicy && tLogPolicy && + getDesiredRemoteLogs() >= 1 && remoteTLogReplicationFactor >= 0 && repopulateRegionAntiQuorum >= 0 && + repopulateRegionAntiQuorum <= 1 && usableRegions >= 1 && usableRegions <= 2 && regions.size() <= 2 && + (usableRegions == 1 || regions.size() == 2) && (regions.size() == 0 || regions[0].priority >= 0) && + (regions.size() == 0 || + tLogPolicy->info() != + "dcid^2 x zoneid^2 x 1"))) { // We cannot specify regions with three_datacenter replication return false; } @@ -318,11 +301,11 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const { if (desiredTLogCount != -1 || isOverridden("logs")) { result["logs"] = desiredTLogCount; } - if (proxyCount != -1 || isOverridden("proxies")) { - result["proxies"] = proxyCount; + if (commitProxyCount != -1 || isOverridden("commit_proxies")) { + result["commit_proxies"] = commitProxyCount; } if (grvProxyCount != -1 || isOverridden("grv_proxies")) { - result["grv_proxies"] = proxyCount; + result["grv_proxies"] = commitProxyCount; } if (resolverCount != -1 || isOverridden("resolvers")) { result["resolvers"] = resolverCount; @@ -336,8 +319,8 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const { if (repopulateRegionAntiQuorum != 0 || isOverridden("repopulate_anti_quorum")) { result["repopulate_anti_quorum"] = repopulateRegionAntiQuorum; } - if (autoProxyCount != CLIENT_KNOBS->DEFAULT_AUTO_PROXIES || isOverridden("auto_proxies")) { - result["auto_proxies"] = autoProxyCount; + if (autoCommitProxyCount != CLIENT_KNOBS->DEFAULT_AUTO_COMMIT_PROXIES || isOverridden("auto_commit_proxies")) { + result["auto_commit_proxies"] = autoCommitProxyCount; } if (autoGrvProxyCount != CLIENT_KNOBS->DEFAULT_AUTO_GRV_PROXIES || isOverridden("auto_grv_proxies")) { result["auto_grv_proxies"] = autoGrvProxyCount; @@ -419,8 +402,8 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) { if (ck == LiteralStringRef("initialized")) { initialized = true; - } else if (ck == LiteralStringRef("proxies")) { - parse(&proxyCount, value); + } else if (ck == LiteralStringRef("commit_proxies")) { + parse(&commitProxyCount, value); } else if (ck == LiteralStringRef("grv_proxies")) { parse(&grvProxyCount, value); } else if (ck == LiteralStringRef("resolvers")) { @@ -459,8 +442,8 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) { } else if (ck == LiteralStringRef("storage_engine")) { parse((&type), value); storageServerStoreType = (KeyValueStoreType::StoreType)type; - } else if (ck == LiteralStringRef("auto_proxies")) { - parse(&autoProxyCount, value); + } else if (ck == LiteralStringRef("auto_commit_proxies")) { + parse(&autoCommitProxyCount, value); } else if (ck == LiteralStringRef("auto_grv_proxies")) { parse(&autoGrvProxyCount, value); } else if (ck == LiteralStringRef("auto_resolvers")) { diff --git a/fdbclient/DatabaseConfiguration.h b/fdbclient/DatabaseConfiguration.h index 4a045200e8..5f3a852ed9 100644 --- a/fdbclient/DatabaseConfiguration.h +++ b/fdbclient/DatabaseConfiguration.h @@ -149,9 +149,9 @@ struct DatabaseConfiguration { return std::min(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, storageTeamSize - 1); } - // Proxy Servers - int32_t proxyCount; - int32_t autoProxyCount; + // CommitProxy Servers + int32_t commitProxyCount; + int32_t autoCommitProxyCount; int32_t grvProxyCount; int32_t autoGrvProxyCount; @@ -192,7 +192,10 @@ struct DatabaseConfiguration { bool isExcludedServer( NetworkAddressList ) const; std::set getExcludedServers() const; - int32_t getDesiredProxies() const { if(proxyCount == -1) return autoProxyCount; return proxyCount; } + int32_t getDesiredCommitProxies() const { + if (commitProxyCount == -1) return autoCommitProxyCount; + return commitProxyCount; + } int32_t getDesiredGrvProxies() const { if (grvProxyCount == -1) return autoGrvProxyCount; return grvProxyCount; diff --git a/fdbclient/DatabaseContext.h b/fdbclient/DatabaseContext.h index 0f86d41f9e..f9367482e5 100644 --- a/fdbclient/DatabaseContext.h +++ b/fdbclient/DatabaseContext.h @@ -29,7 +29,7 @@ #include "fdbclient/NativeAPI.actor.h" #include "fdbclient/KeyRangeMap.h" -#include "fdbclient/MasterProxyInterface.h" +#include "fdbclient/CommitProxyInterface.h" #include "fdbclient/SpecialKeySpace.actor.h" #include "fdbrpc/QueueModel.h" #include "fdbrpc/MultiInterface.h" @@ -68,7 +68,7 @@ struct LocationInfo : MultiInterface } }; -using ProxyInfo = ModelInterface; +using CommitProxyInfo = ModelInterface; using GrvProxyInfo = ModelInterface; class ClientTagThrottleData : NonCopyable { @@ -165,8 +165,8 @@ public: bool sampleOnCost(uint64_t cost) const; void updateProxies(); - Reference getMasterProxies(bool useProvisionalProxies); - Future> getMasterProxiesFuture(bool useProvisionalProxies); + Reference getCommitProxies(bool useProvisionalProxies); + Future> getCommitProxiesFuture(bool useProvisionalProxies); Reference getGrvProxies(bool useProvisionalProxies); Future onProxiesChanged(); Future getHealthMetrics(bool detailed); @@ -219,7 +219,7 @@ public: Reference>> connectionFile; AsyncTrigger proxiesChangeTrigger; Future monitorProxiesInfoChange; - Reference masterProxies; + Reference commitProxies; Reference grvProxies; bool proxyProvisional; UID proxiesLastChange; diff --git a/fdbclient/GrvProxyInterface.h b/fdbclient/GrvProxyInterface.h index 06d4b7e946..94820a175f 100644 --- a/fdbclient/GrvProxyInterface.h +++ b/fdbclient/GrvProxyInterface.h @@ -27,6 +27,8 @@ // with RateKeeper to gather health information of the cluster. struct GrvProxyInterface { constexpr static FileIdentifier file_identifier = 8743216; + enum { LocationAwareLoadBalance = 1 }; + enum { AlwaysFresh = 1 }; Optional processId; bool provisional; diff --git a/fdbclient/Knobs.cpp b/fdbclient/Knobs.cpp index c2e99f63fb..d1ec7a4f5f 100644 --- a/fdbclient/Knobs.cpp +++ b/fdbclient/Knobs.cpp @@ -52,7 +52,7 @@ void ClientKnobs::initialize(bool randomize) { init( COORDINATOR_RECONNECTION_DELAY, 1.0 ); init( CLIENT_EXAMPLE_AMOUNT, 20 ); init( MAX_CLIENT_STATUS_AGE, 1.0 ); - init( MAX_MASTER_PROXY_CONNECTIONS, 5 ); if( randomize && BUGGIFY ) MAX_MASTER_PROXY_CONNECTIONS = 1; + init( MAX_COMMIT_PROXY_CONNECTIONS, 5 ); if( randomize && BUGGIFY ) MAX_COMMIT_PROXY_CONNECTIONS = 1; init( MAX_GRV_PROXY_CONNECTIONS, 3 ); if( randomize && BUGGIFY ) MAX_GRV_PROXY_CONNECTIONS = 1; init( STATUS_IDLE_TIMEOUT, 120.0 ); @@ -171,7 +171,7 @@ void ClientKnobs::initialize(bool randomize) { init( MIN_CLEANUP_SECONDS, 3600.0 ); // Configuration - init( DEFAULT_AUTO_PROXIES, 3 ); + init( DEFAULT_AUTO_COMMIT_PROXIES, 3 ); init( DEFAULT_AUTO_GRV_PROXIES, 1 ); init( DEFAULT_AUTO_RESOLVERS, 1 ); init( DEFAULT_AUTO_LOGS, 3 ); diff --git a/fdbclient/Knobs.h b/fdbclient/Knobs.h index 30e7e7f687..7edaf18e7d 100644 --- a/fdbclient/Knobs.h +++ b/fdbclient/Knobs.h @@ -46,7 +46,7 @@ public: double COORDINATOR_RECONNECTION_DELAY; int CLIENT_EXAMPLE_AMOUNT; double MAX_CLIENT_STATUS_AGE; - int MAX_MASTER_PROXY_CONNECTIONS; + int MAX_COMMIT_PROXY_CONNECTIONS; int MAX_GRV_PROXY_CONNECTIONS; double STATUS_IDLE_TIMEOUT; @@ -167,7 +167,7 @@ public: double MIN_CLEANUP_SECONDS; // Configuration - int32_t DEFAULT_AUTO_PROXIES; + int32_t DEFAULT_AUTO_COMMIT_PROXIES; int32_t DEFAULT_AUTO_GRV_PROXIES; int32_t DEFAULT_AUTO_RESOLVERS; int32_t DEFAULT_AUTO_LOGS; diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp index a05fce601e..e4a5183b95 100644 --- a/fdbclient/ManagementAPI.actor.cpp +++ b/fdbclient/ManagementAPI.actor.cpp @@ -78,8 +78,9 @@ std::map configForToken( std::string const& mode ) { std::string key = mode.substr(0, pos); std::string value = mode.substr(pos+1); - if ((key == "logs" || key == "proxies" || key == "grv_proxies" || key == "resolvers" || key == "remote_logs" || - key == "log_routers" || key == "usable_regions" || key == "repopulate_anti_quorum") && + if ((key == "logs" || key == "commit_proxies" || key == "grv_proxies" || key == "resolvers" || + key == "remote_logs" || key == "log_routers" || key == "usable_regions" || + key == "repopulate_anti_quorum") && isInteger(value)) { out[p+key] = value; } @@ -656,7 +657,7 @@ ConfigureAutoResult parseConfig( StatusObject const& status ) { } if (processClass.classType() == ProcessClass::TransactionClass || - processClass.classType() == ProcessClass::ProxyClass || + processClass.classType() == ProcessClass::CommitProxyClass || processClass.classType() == ProcessClass::GrvProxyClass || processClass.classType() == ProcessClass::ResolutionClass || processClass.classType() == ProcessClass::StatelessClass || @@ -701,7 +702,7 @@ ConfigureAutoResult parseConfig( StatusObject const& status ) { if (proc.second == ProcessClass::StatelessClass) { existingStatelessCount++; } - if(proc.second == ProcessClass::ProxyClass) { + if (proc.second == ProcessClass::CommitProxyClass) { existingProxyCount++; } if (proc.second == ProcessClass::GrvProxyClass) { @@ -734,16 +735,16 @@ ConfigureAutoResult parseConfig( StatusObject const& status ) { resolverCount = result.old_resolvers; } - result.desired_proxies = std::max(std::min(12, processCount / 15), 1); + result.desired_commit_proxies = std::max(std::min(12, processCount / 15), 1); int proxyCount; - if (!statusObjConfig.get("proxies", result.old_proxies)) { - result.old_proxies = CLIENT_KNOBS->DEFAULT_AUTO_PROXIES; - statusObjConfig.get("auto_proxies", result.old_proxies); - result.auto_proxies = result.desired_proxies; - proxyCount = result.auto_proxies; + if (!statusObjConfig.get("commit_proxies", result.old_commit_proxies)) { + result.old_commit_proxies = CLIENT_KNOBS->DEFAULT_AUTO_COMMIT_PROXIES; + statusObjConfig.get("auto_commit_proxies", result.old_commit_proxies); + result.auto_commit_proxies = result.desired_commit_proxies; + proxyCount = result.auto_commit_proxies; } else { - result.auto_proxies = result.old_proxies; - proxyCount = result.old_proxies; + result.auto_commit_proxies = result.old_commit_proxies; + proxyCount = result.old_commit_proxies; } // Need to configure a good number. @@ -857,8 +858,8 @@ ACTOR Future autoConfig( Database cx, ConfigureAutoRe if (conf.auto_logs != conf.old_logs) tr.set(configKeysPrefix.toString() + "auto_logs", format("%d", conf.auto_logs)); - if(conf.auto_proxies != conf.old_proxies) - tr.set(configKeysPrefix.toString() + "auto_proxies", format("%d", conf.auto_proxies)); + if (conf.auto_commit_proxies != conf.old_commit_proxies) + tr.set(configKeysPrefix.toString() + "auto_commit_proxies", format("%d", conf.auto_commit_proxies)); if (conf.auto_grv_proxies != conf.old_grv_proxies) tr.set(configKeysPrefix.toString() + "auto_grv_proxies", format("%d", conf.auto_grv_proxies)); diff --git a/fdbclient/ManagementAPI.actor.h b/fdbclient/ManagementAPI.actor.h index 20b2a447d9..e87f9aedd2 100644 --- a/fdbclient/ManagementAPI.actor.h +++ b/fdbclient/ManagementAPI.actor.h @@ -86,7 +86,7 @@ struct ConfigureAutoResult { int32_t machines; std::string old_replication; - int32_t old_proxies; + int32_t old_commit_proxies; int32_t old_grv_proxies; int32_t old_resolvers; int32_t old_logs; @@ -94,23 +94,24 @@ struct ConfigureAutoResult { int32_t old_machines_with_transaction; std::string auto_replication; - int32_t auto_proxies; + int32_t auto_commit_proxies; int32_t auto_grv_proxies; int32_t auto_resolvers; int32_t auto_logs; int32_t auto_processes_with_transaction; int32_t auto_machines_with_transaction; - int32_t desired_proxies; + int32_t desired_commit_proxies; int32_t desired_grv_proxies; int32_t desired_resolvers; int32_t desired_logs; ConfigureAutoResult() - : processes(-1), machines(-1), old_proxies(-1), old_grv_proxies(-1), old_resolvers(-1), old_logs(-1), - old_processes_with_transaction(-1), old_machines_with_transaction(-1), auto_proxies(-1), auto_grv_proxies(-1), - auto_resolvers(-1), auto_logs(-1), auto_processes_with_transaction(-1), auto_machines_with_transaction(-1), - desired_proxies(-1), desired_grv_proxies(-1), desired_resolvers(-1), desired_logs(-1) {} + : processes(-1), machines(-1), old_commit_proxies(-1), old_grv_proxies(-1), old_resolvers(-1), old_logs(-1), + old_processes_with_transaction(-1), old_machines_with_transaction(-1), auto_commit_proxies(-1), + auto_grv_proxies(-1), auto_resolvers(-1), auto_logs(-1), auto_processes_with_transaction(-1), + auto_machines_with_transaction(-1), desired_commit_proxies(-1), desired_grv_proxies(-1), desired_resolvers(-1), + desired_logs(-1) {} bool isValid() const { return processes != -1; } }; diff --git a/fdbclient/MonitorLeader.actor.cpp b/fdbclient/MonitorLeader.actor.cpp index 1e13b18560..e3ac757840 100644 --- a/fdbclient/MonitorLeader.actor.cpp +++ b/fdbclient/MonitorLeader.actor.cpp @@ -624,8 +624,8 @@ ACTOR Future getClientInfoFromLeader( Referenceget().get().clientInterface.openDatabase.getReply( req ) ) ) ) { TraceEvent("MonitorLeaderForProxiesGotClientInfo", knownLeader->get().get().clientInterface.id()) - .detail("MasterProxy0", ni.masterProxies.size() ? ni.masterProxies[0].id() : UID()) - .detail("GrvProxy0", ni.grvProxies.size() ? ni.grvProxies[0].id() : UID()) + .detail("CommitProxy0", ni.commitProxies.size() ? ni.commitProxies[0].id() : UID()) + .detail("GrvProxy0", ni.grvProxies.size() ? ni.grvProxies[0].id() : UID()) .detail("ClientID", ni.id); clientData->clientInfo->set(CachedSerialization(ni)); } @@ -681,24 +681,25 @@ ACTOR Future monitorLeaderForProxies( Key clusterKey, vector& lastMasterProxyUIDs, std::vector& lastMasterProxies, - std::vector& lastGrvProxyUIDs, std::vector& lastGrvProxies) { - if(ni.masterProxies.size() > CLIENT_KNOBS->MAX_MASTER_PROXY_CONNECTIONS) { - std::vector masterProxyUIDs; - for(auto& masterProxy : ni.masterProxies) { - masterProxyUIDs.push_back(masterProxy.id()); +void shrinkProxyList(ClientDBInfo& ni, std::vector& lastCommitProxyUIDs, + std::vector& lastCommitProxies, std::vector& lastGrvProxyUIDs, + std::vector& lastGrvProxies) { + if (ni.commitProxies.size() > CLIENT_KNOBS->MAX_COMMIT_PROXY_CONNECTIONS) { + std::vector commitProxyUIDs; + for (auto& commitProxy : ni.commitProxies) { + commitProxyUIDs.push_back(commitProxy.id()); } - if(masterProxyUIDs != lastMasterProxyUIDs) { - lastMasterProxyUIDs.swap(masterProxyUIDs); - lastMasterProxies = ni.masterProxies; - deterministicRandom()->randomShuffle(lastMasterProxies); - lastMasterProxies.resize(CLIENT_KNOBS->MAX_MASTER_PROXY_CONNECTIONS); - for(int i = 0; i < lastMasterProxies.size(); i++) { - TraceEvent("ConnectedMasterProxy").detail("MasterProxy", lastMasterProxies[i].id()); + if (commitProxyUIDs != lastCommitProxyUIDs) { + lastCommitProxyUIDs.swap(commitProxyUIDs); + lastCommitProxies = ni.commitProxies; + deterministicRandom()->randomShuffle(lastCommitProxies); + lastCommitProxies.resize(CLIENT_KNOBS->MAX_COMMIT_PROXY_CONNECTIONS); + for (int i = 0; i < lastCommitProxies.size(); i++) { + TraceEvent("ConnectedCommitProxy").detail("CommitProxy", lastCommitProxies[i].id()); } } - ni.firstProxy = ni.masterProxies[0]; - ni.masterProxies = lastMasterProxies; + ni.firstCommitProxy = ni.commitProxies[0]; + ni.commitProxies = lastCommitProxies; } if(ni.grvProxies.size() > CLIENT_KNOBS->MAX_GRV_PROXY_CONNECTIONS) { std::vector grvProxyUIDs; @@ -719,14 +720,16 @@ void shrinkProxyList( ClientDBInfo& ni, std::vector& lastMasterProxyUIDs, s } // Leader is the process that will be elected by coordinators as the cluster controller -ACTOR Future monitorProxiesOneGeneration( Reference connFile, Reference> clientInfo, MonitorLeaderInfo info, Reference>>> supportedVersions, Key traceLogGroup) { +ACTOR Future monitorProxiesOneGeneration( + Reference connFile, Reference> clientInfo, MonitorLeaderInfo info, + Reference>>> supportedVersions, Key traceLogGroup) { state ClusterConnectionString cs = info.intermediateConnFile->getConnectionString(); state vector addrs = cs.coordinators(); state int idx = 0; state int successIdx = 0; state Optional incorrectTime; - state std::vector lastProxyUIDs; - state std::vector lastProxies; + state std::vector lastCommitProxyUIDs; + state std::vector lastCommitProxies; state std::vector lastGrvProxyUIDs; state std::vector lastGrvProxies; @@ -780,7 +783,7 @@ ACTOR Future monitorProxiesOneGeneration( ReferencenotifyConnected(); auto& ni = rep.get().mutate(); - shrinkProxyList(ni, lastProxyUIDs, lastProxies, lastGrvProxyUIDs, lastGrvProxies); + shrinkProxyList(ni, lastCommitProxyUIDs, lastCommitProxies, lastGrvProxyUIDs, lastGrvProxies); clientInfo->set( ni ); successIdx = idx; } else { diff --git a/fdbclient/MonitorLeader.h b/fdbclient/MonitorLeader.h index 58f1fd3bbd..643cf361c7 100644 --- a/fdbclient/MonitorLeader.h +++ b/fdbclient/MonitorLeader.h @@ -25,7 +25,7 @@ #include "fdbclient/FDBTypes.h" #include "fdbclient/CoordinationInterface.h" #include "fdbclient/ClusterInterface.h" -#include "fdbclient/MasterProxyInterface.h" +#include "fdbclient/CommitProxyInterface.h" #define CLUSTER_FILE_ENV_VAR_NAME "FDB_CLUSTER_FILE" @@ -67,8 +67,9 @@ Future monitorLeaderForProxies( Value const& key, vector c Future monitorProxies( Reference>> const& connFile, Reference> const& clientInfo, Reference>>> const& supportedVersions, Key const& traceLogGroup ); -void shrinkProxyList( ClientDBInfo& ni, std::vector& lastMasterProxyUIDs, std::vector& lastMasterProxies, - std::vector& lastGrvProxyUIDs, std::vector& lastGrvProxies); +void shrinkProxyList(ClientDBInfo& ni, std::vector& lastCommitProxyUIDs, + std::vector& lastCommitProxies, std::vector& lastGrvProxyUIDs, + std::vector& lastGrvProxies); #ifndef __INTEL_COMPILER #pragma region Implementation diff --git a/fdbclient/MutationList.h b/fdbclient/MutationList.h index bcc9b0db76..57aba3614c 100644 --- a/fdbclient/MutationList.h +++ b/fdbclient/MutationList.h @@ -151,7 +151,8 @@ public: } } - //FIXME: this is re-implemented on the master proxy to include a yield, any changes to this function should also done there + // FIXME: this is re-implemented on the commit proxy to include a yield, any changes to this function should also + // done there template void serialize_save( Ar& ar ) const { serializer(ar, totalBytes); diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index fba6fdf6f8..19683a536b 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -40,7 +40,7 @@ #include "fdbclient/KeyRangeMap.h" #include "fdbclient/Knobs.h" #include "fdbclient/ManagementAPI.actor.h" -#include "fdbclient/MasterProxyInterface.h" +#include "fdbclient/CommitProxyInterface.h" #include "fdbclient/MonitorLeader.h" #include "fdbclient/MutationList.h" #include "fdbclient/ReadYourWrites.h" @@ -484,15 +484,15 @@ ACTOR static Future clientStatusUpdateActor(DatabaseContext *cx) { } ACTOR static Future monitorProxiesChange(Reference> clientDBInfo, AsyncTrigger *triggerVar) { - state vector< MasterProxyInterface > curProxies; + state vector curCommitProxies; state vector< GrvProxyInterface > curGrvProxies; - curProxies = clientDBInfo->get().masterProxies; + curCommitProxies = clientDBInfo->get().commitProxies; curGrvProxies = clientDBInfo->get().grvProxies; loop{ wait(clientDBInfo->onChange()); - if (clientDBInfo->get().masterProxies != curProxies || clientDBInfo->get().grvProxies != curGrvProxies) { - curProxies = clientDBInfo->get().masterProxies; + if (clientDBInfo->get().commitProxies != curCommitProxies || clientDBInfo->get().grvProxies != curGrvProxies) { + curCommitProxies = clientDBInfo->get().commitProxies; curGrvProxies = clientDBInfo->get().grvProxies; triggerVar->trigger(); } @@ -881,7 +881,7 @@ DatabaseContext::DatabaseContext(Reference(specialKeys.begin, specialKeys.end, /* test */ false)) { dbId = deterministicRandom()->randomUniqueID(); - connected = (clientInfo->get().masterProxies.size() && clientInfo->get().grvProxies.size()) + connected = (clientInfo->get().commitProxies.size() && clientInfo->get().grvProxies.size()) ? Void() : clientInfo->onChange(); @@ -1164,9 +1164,9 @@ void DatabaseContext::setOption( FDBDatabaseOptions::Option option, Optional(value.get()) : Optional>(), clientLocality.machineId(), clientLocality.dcId() ); - if( clientInfo->get().masterProxies.size() ) - masterProxies = Reference( new ProxyInfo( clientInfo->get().masterProxies) ); - if( clientInfo->get().grvProxies.size() ) + if (clientInfo->get().commitProxies.size()) + commitProxies = Reference(new CommitProxyInfo(clientInfo->get().commitProxies)); + if( clientInfo->get().grvProxies.size() ) grvProxies = Reference( new GrvProxyInfo( clientInfo->get().grvProxies ) ); server_interf.clear(); locationCache.insert( allKeys, Reference() ); @@ -1176,9 +1176,9 @@ void DatabaseContext::setOption( FDBDatabaseOptions::Option option, Optional(value.get()) : Optional>()); - if( clientInfo->get().masterProxies.size() ) - masterProxies = Reference( new ProxyInfo( clientInfo->get().masterProxies)); - if( clientInfo->get().grvProxies.size() ) + if (clientInfo->get().commitProxies.size()) + commitProxies = Reference(new CommitProxyInfo(clientInfo->get().commitProxies)); + if( clientInfo->get().grvProxies.size() ) grvProxies = Reference( new GrvProxyInfo( clientInfo->get().grvProxies )); server_interf.clear(); locationCache.insert( allKeys, Reference() ); @@ -1220,13 +1220,13 @@ ACTOR static Future switchConnectionFileImpl(ReferencegetConnectionString().toString()); // Reset state from former cluster. - self->masterProxies.clear(); + self->commitProxies.clear(); self->grvProxies.clear(); self->minAcceptableReadVersion = std::numeric_limits::max(); self->invalidateCache(allKeys); auto clearedClientInfo = self->clientInfo->get(); - clearedClientInfo.masterProxies.clear(); + clearedClientInfo.commitProxies.clear(); clearedClientInfo.grvProxies.clear(); clearedClientInfo.id = deterministicRandom()->randomUniqueID(); self->clientInfo->set(clearedClientInfo); @@ -1561,29 +1561,29 @@ void stopNetwork() { void DatabaseContext::updateProxies() { if (proxiesLastChange == clientInfo->get().id) return; proxiesLastChange = clientInfo->get().id; - masterProxies.clear(); + commitProxies.clear(); grvProxies.clear(); - bool masterProxyProvisional = false, grvProxyProvisional = false; - if (clientInfo->get().masterProxies.size()) { - masterProxies = Reference(new ProxyInfo(clientInfo->get().masterProxies)); - masterProxyProvisional = clientInfo->get().masterProxies[0].provisional; + bool commitProxyProvisional = false, grvProxyProvisional = false; + if (clientInfo->get().commitProxies.size()) { + commitProxies = Reference(new CommitProxyInfo(clientInfo->get().commitProxies)); + commitProxyProvisional = clientInfo->get().commitProxies[0].provisional; } if (clientInfo->get().grvProxies.size()) { grvProxies = Reference(new GrvProxyInfo(clientInfo->get().grvProxies)); grvProxyProvisional = clientInfo->get().grvProxies[0].provisional; } - if (clientInfo->get().masterProxies.size() && clientInfo->get().grvProxies.size()) { - ASSERT(masterProxyProvisional == grvProxyProvisional); - proxyProvisional = masterProxyProvisional; + if (clientInfo->get().commitProxies.size() && clientInfo->get().grvProxies.size()) { + ASSERT(commitProxyProvisional == grvProxyProvisional); + proxyProvisional = commitProxyProvisional; } } -Reference DatabaseContext::getMasterProxies(bool useProvisionalProxies) { +Reference DatabaseContext::getCommitProxies(bool useProvisionalProxies) { updateProxies(); if (proxyProvisional && !useProvisionalProxies) { - return Reference(); + return Reference(); } - return masterProxies; + return commitProxies; } Reference DatabaseContext::getGrvProxies(bool useProvisionalProxies) { @@ -1594,19 +1594,19 @@ Reference DatabaseContext::getGrvProxies(bool useProvisionalProxie return grvProxies; } -//Actor which will wait until the MultiInterface returned by the DatabaseContext cx is not NULL -ACTOR Future> getMasterProxiesFuture(DatabaseContext *cx, bool useProvisionalProxies) { +// Actor which will wait until the MultiInterface returned by the DatabaseContext cx is not NULL +ACTOR Future> getCommitProxiesFuture(DatabaseContext* cx, bool useProvisionalProxies) { loop{ - Reference proxies = cx->getMasterProxies(useProvisionalProxies); + Reference proxies = cx->getCommitProxies(useProvisionalProxies); if (proxies) return proxies; wait( cx->onProxiesChanged() ); } } -//Returns a future which will not be set until the ProxyInfo of this DatabaseContext is not NULL -Future> DatabaseContext::getMasterProxiesFuture(bool useProvisionalProxies) { - return ::getMasterProxiesFuture(this, useProvisionalProxies); +// Returns a future which will not be set until the CommitProxyInfo of this DatabaseContext is not NULL +Future> DatabaseContext::getCommitProxiesFuture(bool useProvisionalProxies) { + return ::getCommitProxiesFuture(this, useProvisionalProxies); } void GetRangeLimits::decrement( VectorRef const& data ) { @@ -1733,8 +1733,8 @@ ACTOR Future>> getKeyLocation_internal(Da ++cx->transactionKeyServerLocationRequests; choose { when (wait(cx->onProxiesChanged())) {} - when (GetKeyServerLocationsReply rep = wait(basicLoadBalance( - cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations, + when(GetKeyServerLocationsReply rep = wait(basicLoadBalance( + cx->getCommitProxies(info.useProvisionalProxies), &CommitProxyInterface::getKeyServersLocations, GetKeyServerLocationsRequest(span.context, key, Optional(), 100, isBackward, key.arena()), TaskPriority::DefaultPromiseEndpoint))) { ++cx->transactionKeyServerLocationRequestsCompleted; @@ -1782,8 +1782,8 @@ ACTOR Future>>> getKeyRangeLocatio ++cx->transactionKeyServerLocationRequests; choose { when ( wait( cx->onProxiesChanged() ) ) {} - when ( GetKeyServerLocationsReply _rep = wait(basicLoadBalance( - cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations, + when(GetKeyServerLocationsReply _rep = wait(basicLoadBalance( + cx->getCommitProxies(info.useProvisionalProxies), &CommitProxyInterface::getKeyServersLocations, GetKeyServerLocationsRequest(span.context, keys.begin, keys.end, limit, reverse, keys.arena()), TaskPriority::DefaultPromiseEndpoint))) { ++cx->transactionKeyServerLocationRequestsCompleted; @@ -3450,14 +3450,16 @@ ACTOR static Future tryCommit( Database cx, Reference req.debugID = commitID; state Future reply; if (options.commitOnFirstProxy) { - if(cx->clientInfo->get().firstProxy.present()) { - reply = throwErrorOr ( brokenPromiseToMaybeDelivered ( cx->clientInfo->get().firstProxy.get().commit.tryGetReply(req) ) ); + if (cx->clientInfo->get().firstCommitProxy.present()) { + reply = throwErrorOr(brokenPromiseToMaybeDelivered( + cx->clientInfo->get().firstCommitProxy.get().commit.tryGetReply(req))); } else { - const std::vector& proxies = cx->clientInfo->get().masterProxies; + const std::vector& proxies = cx->clientInfo->get().commitProxies; reply = proxies.size() ? throwErrorOr ( brokenPromiseToMaybeDelivered ( proxies[0].commit.tryGetReply(req) ) ) : Never(); } } else { - reply = basicLoadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::commit, req, TaskPriority::DefaultPromiseEndpoint, true ); + reply = basicLoadBalance(cx->getCommitProxies(info.useProvisionalProxies), &CommitProxyInterface::commit, + req, TaskPriority::DefaultPromiseEndpoint, true); } choose { @@ -3531,8 +3533,9 @@ ACTOR static Future tryCommit( Database cx, Reference // We don't know if the commit happened, and it might even still be in flight. if (!options.causalWriteRisky) { - // Make sure it's not still in flight, either by ensuring the master we submitted to is dead, or the version we submitted with is dead, or by committing a conflicting transaction successfully - //if ( cx->getMasterProxies()->masterGeneration <= originalMasterGeneration ) + // Make sure it's not still in flight, either by ensuring the master we submitted to is dead, or the + // version we submitted with is dead, or by committing a conflicting transaction successfully + // if ( cx->getCommitProxies()->masterGeneration <= originalMasterGeneration ) // To ensure the original request is not in flight, we need a key range which intersects its read conflict ranges // We pick a key range which also intersects its write conflict ranges, since that avoids potentially creating conflicts where there otherwise would be none @@ -4433,8 +4436,8 @@ ACTOR Future>> waitDataDistributionMetricsLis choose { when(wait(cx->onProxiesChanged())) {} when(ErrorOr rep = - wait(errorOr(basicLoadBalance(cx->getMasterProxies(false), &MasterProxyInterface::getDDMetrics, - GetDDMetricsRequest(keys, shardLimit))))) { + wait(errorOr(basicLoadBalance(cx->getCommitProxies(false), &CommitProxyInterface::getDDMetrics, + GetDDMetricsRequest(keys, shardLimit))))) { if (rep.isError()) { throw rep.getError(); } @@ -4539,7 +4542,9 @@ ACTOR Future snapCreate(Database cx, Standalone snapCmd, UID sn loop { choose { when(wait(cx->onProxiesChanged())) {} - when(wait(basicLoadBalance(cx->getMasterProxies(false), &MasterProxyInterface::proxySnapReq, ProxySnapRequest(snapCmd, snapUID, snapUID), cx->taskID, true /*atmostOnce*/ ))) { + when(wait(basicLoadBalance(cx->getCommitProxies(false), &CommitProxyInterface::proxySnapReq, + ProxySnapRequest(snapCmd, snapUID, snapUID), cx->taskID, + true /*atmostOnce*/))) { TraceEvent("SnapCreateExit") .detail("SnapCmd", snapCmd.toString()) .detail("UID", snapUID); @@ -4567,8 +4572,8 @@ ACTOR Future checkSafeExclusions(Database cx, vector exc choose { when(wait(cx->onProxiesChanged())) {} when(ExclusionSafetyCheckReply _ddCheck = - wait(basicLoadBalance(cx->getMasterProxies(false), &MasterProxyInterface::exclusionSafetyCheckReq, - req, cx->taskID))) { + wait(basicLoadBalance(cx->getCommitProxies(false), + &CommitProxyInterface::exclusionSafetyCheckReq, req, cx->taskID))) { ddCheck = _ddCheck.safe; break; } diff --git a/fdbclient/NativeAPI.actor.h b/fdbclient/NativeAPI.actor.h index 2d35022a4a..35338b3c93 100644 --- a/fdbclient/NativeAPI.actor.h +++ b/fdbclient/NativeAPI.actor.h @@ -30,7 +30,7 @@ #include "flow/flow.h" #include "flow/TDMetric.actor.h" #include "fdbclient/FDBTypes.h" -#include "fdbclient/MasterProxyInterface.h" +#include "fdbclient/CommitProxyInterface.h" #include "fdbclient/FDBOptions.g.h" #include "fdbclient/CoordinationInterface.h" #include "fdbclient/ClusterInterface.h" diff --git a/fdbclient/Schemas.cpp b/fdbclient/Schemas.cpp index 333887d1f3..6c20adc96e 100644 --- a/fdbclient/Schemas.cpp +++ b/fdbclient/Schemas.cpp @@ -47,7 +47,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "storage", "transaction", "resolution", - "proxy", + "commit_proxy", "grv_proxy", "master", "test", @@ -84,7 +84,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "role":{ "$enum":[ "master", - "proxy", + "commit_proxy", "grv_proxy", "log", "storage", @@ -486,7 +486,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( R"statusSchema( "recovery_state":{ "required_resolvers":1, - "required_proxies":1, + "required_commit_proxies":1, "required_grv_proxies":1, "name":{ "$enum":[ @@ -675,11 +675,11 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "address":"10.0.4.1" } ], - "auto_proxies":3, + "auto_commit_proxies":3, "auto_grv_proxies":1, "auto_resolvers":1, "auto_logs":3, - "proxies":5, + "commit_proxies":5, "grv_proxies":1, "backup_worker_enabled":1 }, @@ -879,11 +879,11 @@ const KeyRef JSONSchemas::clusterConfigurationSchema = LiteralStringRef(R"config "ssd-2", "memory" ]}, - "auto_proxies":3, + "auto_commit_proxies":3, "auto_grv_proxies":1, "auto_resolvers":1, "auto_logs":3, - "proxies":5 + "commit_proxies":5 "grv_proxies":1 })configSchema"); diff --git a/fdbclient/TagThrottle.actor.cpp b/fdbclient/TagThrottle.actor.cpp index a566b2fbfa..ebf0157d1c 100644 --- a/fdbclient/TagThrottle.actor.cpp +++ b/fdbclient/TagThrottle.actor.cpp @@ -19,7 +19,7 @@ */ #include "fdbclient/TagThrottle.h" -#include "fdbclient/MasterProxyInterface.h" +#include "fdbclient/CommitProxyInterface.h" #include "fdbclient/DatabaseContext.h" #include "flow/actorcompiler.h" // has to be last include diff --git a/fdbrpc/Locality.cpp b/fdbrpc/Locality.cpp index 5409abdedf..1a1f64708d 100644 --- a/fdbrpc/Locality.cpp +++ b/fdbrpc/Locality.cpp @@ -63,9 +63,9 @@ ProcessClass::Fitness ProcessClass::machineClassFitness( ClusterRole role ) cons default: return ProcessClass::NeverAssign; } - case ProcessClass::Proxy: + case ProcessClass::CommitProxy: switch( _class ) { - case ProcessClass::ProxyClass: + case ProcessClass::CommitProxyClass: return ProcessClass::BestFit; case ProcessClass::StatelessClass: return ProcessClass::GoodFit; @@ -92,7 +92,7 @@ ProcessClass::Fitness ProcessClass::machineClassFitness( ClusterRole role ) cons return ProcessClass::GoodFit; case ProcessClass::UnsetClass: return ProcessClass::UnsetFit; - case ProcessClass::ProxyClass: + case ProcessClass::CommitProxyClass: return ProcessClass::OkayFit; case ProcessClass::ResolutionClass: return ProcessClass::OkayFit; @@ -192,7 +192,7 @@ ProcessClass::Fitness ProcessClass::machineClassFitness( ClusterRole role ) cons return ProcessClass::OkayFit; case ProcessClass::TransactionClass: return ProcessClass::OkayFit; - case ProcessClass::ProxyClass: + case ProcessClass::CommitProxyClass: return ProcessClass::OkayFit; case ProcessClass::GrvProxyClass: return ProcessClass::OkayFit; diff --git a/fdbrpc/Locality.h b/fdbrpc/Locality.h index da89dfc3cb..8f9be25818 100644 --- a/fdbrpc/Locality.h +++ b/fdbrpc/Locality.h @@ -33,7 +33,7 @@ struct ProcessClass { TransactionClass, ResolutionClass, TesterClass, - ProxyClass, // Process class of CommitProxy + CommitProxyClass, GrvProxyClass, MasterClass, StatelessClass, @@ -53,7 +53,7 @@ struct ProcessClass { enum ClusterRole { Storage, TLog, - Proxy, + CommitProxy, GrvProxy, Master, Resolver, @@ -77,7 +77,8 @@ public: if (s=="storage") _class = StorageClass; else if (s=="transaction") _class = TransactionClass; else if (s=="resolution") _class = ResolutionClass; - else if (s=="proxy") _class = ProxyClass; +// else if (s=="proxy") _class = CommitProxyClass; + else if (s=="commit_proxy") _class = CommitProxyClass; else if (s=="grv_proxy") _class = GrvProxyClass; else if (s=="master") _class = MasterClass; else if (s=="test") _class = TesterClass; @@ -99,7 +100,8 @@ public: if (classStr=="storage") _class = StorageClass; else if (classStr=="transaction") _class = TransactionClass; else if (classStr=="resolution") _class = ResolutionClass; - else if (classStr=="proxy") _class = ProxyClass; +// else if (classStr=="proxy") _class = CommitProxyClass; + else if (classStr=="commit_proxy") _class = CommitProxyClass; else if (classStr=="grv_proxy") _class = GrvProxyClass; else if (classStr=="master") _class = MasterClass; else if (classStr=="test") _class = TesterClass; @@ -137,7 +139,7 @@ public: case StorageClass: return "storage"; case TransactionClass: return "transaction"; case ResolutionClass: return "resolution"; - case ProxyClass: return "proxy"; + case CommitProxyClass: return "commit_proxy"; case GrvProxyClass: return "grv_proxy"; case MasterClass: return "master"; case TesterClass: return "test"; @@ -342,6 +344,7 @@ struct LBLocalityData { template struct LBLocalityData::type> { enum { Present = 1 }; + // TODO: figure out why some interfaces don't have locality. static LocalityData getLocality( Interface const& i ) { return i.locality; } static NetworkAddress getAddress( Interface const& i ) { return i.address(); } static bool alwaysFresh() { return Interface::AlwaysFresh; } diff --git a/fdbrpc/simulator.h b/fdbrpc/simulator.h index 8f01cad30a..e27f12a744 100644 --- a/fdbrpc/simulator.h +++ b/fdbrpc/simulator.h @@ -97,7 +97,8 @@ public: case ProcessClass::StorageClass: return true; case ProcessClass::TransactionClass: return true; case ProcessClass::ResolutionClass: return false; - case ProcessClass::ProxyClass: return false; + case ProcessClass::CommitProxyClass: + return false; case ProcessClass::GrvProxyClass: return false; case ProcessClass::MasterClass: diff --git a/fdbserver/BackupWorker.actor.cpp b/fdbserver/BackupWorker.actor.cpp index 5860a6772a..2f7ff8fbca 100644 --- a/fdbserver/BackupWorker.actor.cpp +++ b/fdbserver/BackupWorker.actor.cpp @@ -21,7 +21,7 @@ #include "fdbclient/BackupAgent.actor.h" #include "fdbclient/BackupContainer.h" #include "fdbclient/DatabaseContext.h" -#include "fdbclient/MasterProxyInterface.h" +#include "fdbclient/CommitProxyInterface.h" #include "fdbclient/SystemData.h" #include "fdbserver/BackupInterface.h" #include "fdbserver/BackupProgress.actor.h" diff --git a/fdbserver/CMakeLists.txt b/fdbserver/CMakeLists.txt index 3404df2547..823150dfe9 100644 --- a/fdbserver/CMakeLists.txt +++ b/fdbserver/CMakeLists.txt @@ -46,7 +46,7 @@ set(FDBSERVER_SRCS LogSystemDiskQueueAdapter.h LogSystemPeekCursor.actor.cpp MasterInterface.h - MasterProxyServer.actor.cpp + CommitProxyServer.actor.cpp masterserver.actor.cpp MutationTracking.h MutationTracking.cpp diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index c51351b96e..5c4b1c8215 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -753,20 +753,21 @@ public: } } - auto first_proxy = getWorkerForRoleInDatacenter(dcId, ProcessClass::Proxy, ProcessClass::ExcludeFit, - req.configuration, id_used); + auto first_commit_proxy = getWorkerForRoleInDatacenter(dcId, ProcessClass::CommitProxy, + ProcessClass::ExcludeFit, req.configuration, id_used); auto first_grv_proxy = getWorkerForRoleInDatacenter(dcId, ProcessClass::GrvProxy, ProcessClass::ExcludeFit, req.configuration, id_used); auto first_resolver = getWorkerForRoleInDatacenter(dcId, ProcessClass::Resolver, ProcessClass::ExcludeFit, req.configuration, id_used); - auto proxies = getWorkersForRoleInDatacenter(dcId, ProcessClass::Proxy, req.configuration.getDesiredProxies(), - req.configuration, id_used, first_proxy); + auto commit_proxies = + getWorkersForRoleInDatacenter(dcId, ProcessClass::CommitProxy, req.configuration.getDesiredCommitProxies(), + req.configuration, id_used, first_commit_proxy); auto grv_proxies = getWorkersForRoleInDatacenter(dcId, ProcessClass::GrvProxy, req.configuration.getDesiredGrvProxies(), req.configuration, id_used, first_grv_proxy); auto resolvers = getWorkersForRoleInDatacenter( dcId, ProcessClass::Resolver, req.configuration.getDesiredResolvers(), req.configuration, id_used, first_resolver ); - for (int i = 0; i < proxies.size(); i++) result.masterProxies.push_back(proxies[i].interf); + for (int i = 0; i < commit_proxies.size(); i++) result.commitProxies.push_back(commit_proxies[i].interf); for (int i = 0; i < grv_proxies.size(); i++) result.grvProxies.push_back(grv_proxies[i].interf); for(int i = 0; i < resolvers.size(); i++) result.resolvers.push_back(resolvers[i].interf); @@ -800,9 +801,9 @@ public: RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredSatelliteLogs(dcId), ProcessClass::TLog) .betterCount(RoleFitness(satelliteLogs, ProcessClass::TLog))) || - RoleFitness(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, req.configuration.getDesiredProxies(), - ProcessClass::Proxy) - .betterCount(RoleFitness(proxies, ProcessClass::Proxy)) || + RoleFitness(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, req.configuration.getDesiredCommitProxies(), + ProcessClass::CommitProxy) + .betterCount(RoleFitness(commit_proxies, ProcessClass::CommitProxy)) || RoleFitness(SERVER_KNOBS->EXPECTED_GRV_PROXY_FITNESS, req.configuration.getDesiredGrvProxies(), ProcessClass::GrvProxy) .betterCount(RoleFitness(grv_proxies, ProcessClass::GrvProxy)) || @@ -911,22 +912,22 @@ public: try { //SOMEDAY: recruitment in other DCs besides the clusterControllerDcID will not account for the processes used by the master and cluster controller properly. auto used = id_used; - auto first_proxy = getWorkerForRoleInDatacenter(dcId, ProcessClass::Proxy, ProcessClass::ExcludeFit, - req.configuration, used); + auto first_commit_proxy = getWorkerForRoleInDatacenter( + dcId, ProcessClass::CommitProxy, ProcessClass::ExcludeFit, req.configuration, used); auto first_grv_proxy = getWorkerForRoleInDatacenter( dcId, ProcessClass::GrvProxy, ProcessClass::ExcludeFit, req.configuration, used); auto first_resolver = getWorkerForRoleInDatacenter( dcId, ProcessClass::Resolver, ProcessClass::ExcludeFit, req.configuration, used); - auto proxies = - getWorkersForRoleInDatacenter(dcId, ProcessClass::Proxy, req.configuration.getDesiredProxies(), - req.configuration, used, first_proxy); + auto commit_proxies = getWorkersForRoleInDatacenter(dcId, ProcessClass::CommitProxy, + req.configuration.getDesiredCommitProxies(), + req.configuration, used, first_commit_proxy); auto grv_proxies = getWorkersForRoleInDatacenter(dcId, ProcessClass::GrvProxy, req.configuration.getDesiredGrvProxies(), req.configuration, used, first_grv_proxy); auto resolvers = getWorkersForRoleInDatacenter( dcId, ProcessClass::Resolver, req.configuration.getDesiredResolvers(), req.configuration, used, first_resolver ); - RoleFitnessPair fitness(RoleFitness(proxies, ProcessClass::Proxy), + RoleFitnessPair fitness(RoleFitness(commit_proxies, ProcessClass::CommitProxy), RoleFitness(grv_proxies, ProcessClass::GrvProxy), RoleFitness(resolvers, ProcessClass::Resolver)); @@ -936,8 +937,8 @@ public: for (int i = 0; i < resolvers.size(); i++) { result.resolvers.push_back(resolvers[i].interf); } - for (int i = 0; i < proxies.size(); i++) { - result.masterProxies.push_back(proxies[i].interf); + for (int i = 0; i < commit_proxies.size(); i++) { + result.commitProxies.push_back(commit_proxies[i].interf); } for (int i = 0; i < grv_proxies.size(); i++) { result.grvProxies.push_back(grv_proxies[i].interf); @@ -982,8 +983,8 @@ public: .detail("Replication", req.configuration.tLogReplicationFactor) .detail("DesiredLogs", req.configuration.getDesiredLogs()) .detail("ActualLogs", result.tLogs.size()) - .detail("DesiredProxies", req.configuration.getDesiredProxies()) - .detail("ActualProxies", result.masterProxies.size()) + .detail("DesiredCommitProxies", req.configuration.getDesiredCommitProxies()) + .detail("ActualCommitProxies", result.commitProxies.size()) .detail("DesiredGrvProxies", req.configuration.getDesiredGrvProxies()) .detail("ActualGrvProxies", result.grvProxies.size()) .detail("DesiredResolvers", req.configuration.getDesiredResolvers()) @@ -993,8 +994,8 @@ public: (RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs(), ProcessClass::TLog) .betterCount(RoleFitness(tlogs, ProcessClass::TLog)) || - RoleFitness(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, req.configuration.getDesiredProxies(), - ProcessClass::Proxy) + RoleFitness(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, req.configuration.getDesiredCommitProxies(), + ProcessClass::CommitProxy) .betterCount(bestFitness.proxy) || RoleFitness(SERVER_KNOBS->EXPECTED_GRV_PROXY_FITNESS, req.configuration.getDesiredGrvProxies(), ProcessClass::GrvProxy) @@ -1028,7 +1029,8 @@ public: } getWorkerForRoleInDatacenter( regions[0].dcId, ProcessClass::Resolver, ProcessClass::ExcludeFit, db.config, id_used, true ); - getWorkerForRoleInDatacenter( regions[0].dcId, ProcessClass::Proxy, ProcessClass::ExcludeFit, db.config, id_used, true ); + getWorkerForRoleInDatacenter(regions[0].dcId, ProcessClass::CommitProxy, ProcessClass::ExcludeFit, + db.config, id_used, true); getWorkerForRoleInDatacenter(regions[0].dcId, ProcessClass::GrvProxy, ProcessClass::ExcludeFit, db.config, id_used, true); @@ -1129,15 +1131,13 @@ public: } } - // Get proxy classes - std::vector proxyClasses; - for(auto& it : dbi.client.masterProxies) { - auto masterProxyWorker = id_worker.find(it.processId); - if ( masterProxyWorker == id_worker.end() ) - return false; - if ( masterProxyWorker->second.priorityInfo.isExcluded ) - return true; - proxyClasses.push_back(masterProxyWorker->second.details); + // Get commit proxy classes + std::vector commitProxyClasses; + for (auto& it : dbi.client.commitProxies) { + auto commitProxyWorker = id_worker.find(it.processId); + if (commitProxyWorker == id_worker.end()) return false; + if (commitProxyWorker->second.priorityInfo.isExcluded) return true; + commitProxyClasses.push_back(commitProxyWorker->second.details); } // Get grv proxy classes @@ -1285,25 +1285,25 @@ public: if(oldLogRoutersFit < newLogRoutersFit) return false; // Check proxy/grvProxy/resolver fitness - RoleFitnessPair oldInFit(RoleFitness(proxyClasses, ProcessClass::Proxy), + RoleFitnessPair oldInFit(RoleFitness(commitProxyClasses, ProcessClass::CommitProxy), RoleFitness(grvProxyClasses, ProcessClass::GrvProxy), RoleFitness(resolverClasses, ProcessClass::Resolver)); - auto first_proxy = getWorkerForRoleInDatacenter(clusterControllerDcId, ProcessClass::Proxy, - ProcessClass::ExcludeFit, db.config, id_used, true); + auto first_commit_proxy = getWorkerForRoleInDatacenter(clusterControllerDcId, ProcessClass::CommitProxy, + ProcessClass::ExcludeFit, db.config, id_used, true); auto first_grv_proxy = getWorkerForRoleInDatacenter(clusterControllerDcId, ProcessClass::GrvProxy, ProcessClass::ExcludeFit, db.config, id_used, true); auto first_resolver = getWorkerForRoleInDatacenter(clusterControllerDcId, ProcessClass::Resolver, ProcessClass::ExcludeFit, db.config, id_used, true); - auto proxies = - getWorkersForRoleInDatacenter(clusterControllerDcId, ProcessClass::Proxy, db.config.getDesiredProxies(), - db.config, id_used, first_proxy, true); + auto commit_proxies = getWorkersForRoleInDatacenter(clusterControllerDcId, ProcessClass::CommitProxy, + db.config.getDesiredCommitProxies(), db.config, id_used, + first_commit_proxy, true); auto grv_proxies = getWorkersForRoleInDatacenter(clusterControllerDcId, ProcessClass::GrvProxy, db.config.getDesiredGrvProxies(), db.config, id_used, first_grv_proxy, true); auto resolvers = getWorkersForRoleInDatacenter( clusterControllerDcId, ProcessClass::Resolver, db.config.getDesiredResolvers(), db.config, id_used, first_resolver, true ); - RoleFitnessPair newInFit(RoleFitness(proxies, ProcessClass::Proxy), + RoleFitnessPair newInFit(RoleFitness(commit_proxies, ProcessClass::CommitProxy), RoleFitness(grv_proxies, ProcessClass::GrvProxy), RoleFitness(resolvers, ProcessClass::Resolver)); if (oldInFit.proxy.betterFitness(newInFit.proxy) || oldInFit.grvProxy.betterFitness(newInFit.grvProxy) || @@ -1358,7 +1358,7 @@ public: if (tlog.present() && tlog.interf().filteredLocality.processId() == processId) return true; } } - for (const MasterProxyInterface& interf : dbInfo.client.masterProxies) { + for (const CommitProxyInterface& interf : dbInfo.client.commitProxies) { if (interf.processId == processId) return true; } for (const GrvProxyInterface& interf : dbInfo.client.grvProxies) { @@ -1393,7 +1393,7 @@ public: } } } - for (const MasterProxyInterface& interf : dbInfo.client.masterProxies) { + for (const CommitProxyInterface& interf : dbInfo.client.commitProxies) { ASSERT(interf.processId.present()); idUsed[interf.processId]++; } @@ -1967,7 +1967,7 @@ void clusterRegisterMaster( ClusterControllerData* self, RegisterMasterRequest c .detail("Resolvers", req.resolvers.size()) .detail("RecoveryState", (int)req.recoveryState) .detail("RegistrationCount", req.registrationCount) - .detail("MasterProxies", req.masterProxies.size()) + .detail("CommitProxies", req.commitProxies.size()) .detail("GrvProxies", req.grvProxies.size()) .detail("RecoveryCount", req.recoveryCount) .detail("Stalled", req.recoveryStalled) @@ -2022,11 +2022,12 @@ void clusterRegisterMaster( ClusterControllerData* self, RegisterMasterRequest c } // Construct the client information - if (db->clientInfo->get().masterProxies != req.masterProxies || db->clientInfo->get().grvProxies != req.grvProxies) { + if (db->clientInfo->get().commitProxies != req.commitProxies || + db->clientInfo->get().grvProxies != req.grvProxies) { isChanged = true; ClientDBInfo clientInfo; clientInfo.id = deterministicRandom()->randomUniqueID(); - clientInfo.masterProxies = req.masterProxies; + clientInfo.commitProxies = req.commitProxies; clientInfo.grvProxies = req.grvProxies; clientInfo.clientTxnInfoSampleRate = db->clientInfo->get().clientTxnInfoSampleRate; clientInfo.clientTxnInfoSizeLimit = db->clientInfo->get().clientTxnInfoSizeLimit; diff --git a/fdbserver/MasterProxyServer.actor.cpp b/fdbserver/CommitProxyServer.actor.cpp similarity index 95% rename from fdbserver/MasterProxyServer.actor.cpp rename to fdbserver/CommitProxyServer.actor.cpp index 7d8a36a66a..cb9a8c9486 100644 --- a/fdbserver/MasterProxyServer.actor.cpp +++ b/fdbserver/CommitProxyServer.actor.cpp @@ -1,5 +1,5 @@ /* - * MasterProxyServer.actor.cpp + * CommitProxyServer.actor.cpp * * This source file is part of the FoundationDB open source project * @@ -25,7 +25,7 @@ #include "fdbclient/Atomic.h" #include "fdbclient/FDBTypes.h" #include "fdbclient/Knobs.h" -#include "fdbclient/MasterProxyInterface.h" +#include "fdbclient/CommitProxyInterface.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbclient/SystemData.h" #include "fdbrpc/sim_validation.h" @@ -42,7 +42,6 @@ #include "fdbserver/ProxyCommitData.actor.h" #include "fdbserver/RatekeeperInterface.h" #include "fdbserver/RecoveryState.h" -#include "fdbserver/ServerDBInfo.h" #include "fdbserver/WaitFailure.h" #include "fdbserver/WorkerInterface.actor.h" #include "flow/ActorCollection.h" @@ -229,7 +228,7 @@ ACTOR Future commitBatcher(ProxyCommitData *commitData, PromiseStreamstats.txnCommitIn; if(req.debugID.present()) { - g_traceBatch.addEvent("CommitDebug", req.debugID.get().first(), "MasterProxyServer.batcher"); + g_traceBatch.addEvent("CommitDebug", req.debugID.get().first(), "CommitProxyServer.batcher"); } if(!batch.size()) { @@ -512,11 +511,7 @@ void CommitBatchContext::setupTraceBatch() { } if (debugID.present()) { - g_traceBatch.addEvent( - "CommitDebug", - debugID.get().first(), - "MasterProxyServer.commitBatch.Before" - ); + g_traceBatch.addEvent("CommitDebug", debugID.get().first(), "CommitProxyServer.commitBatch.Before"); } } @@ -546,10 +541,8 @@ ACTOR Future preresolutionProcessing(CommitBatchContext* self) { ); if (debugID.present()) { - g_traceBatch.addEvent( - "CommitDebug", debugID.get().first(), - "MasterProxyServer.commitBatch.GettingCommitVersion" - ); + g_traceBatch.addEvent("CommitDebug", debugID.get().first(), + "CommitProxyServer.commitBatch.GettingCommitVersion"); } GetCommitVersionRequest req(self->span.context, pProxyCommitData->commitVersionRequestNumber++, @@ -577,10 +570,7 @@ ACTOR Future preresolutionProcessing(CommitBatchContext* self) { //TraceEvent("ProxyGotVer", pProxyContext->dbgid).detail("Commit", commitVersion).detail("Prev", prevVersion); if (debugID.present()) { - g_traceBatch.addEvent( - "CommitDebug", debugID.get().first(), - "MasterProxyServer.commitBatch.GotCommitVersion" - ); + g_traceBatch.addEvent("CommitDebug", debugID.get().first(), "CommitProxyServer.commitBatch.GotCommitVersion"); } return Void(); @@ -639,10 +629,8 @@ ACTOR Future getResolution(CommitBatchContext* self) { self->resolution.swap(*const_cast*>(&resolutionResp)); if (self->debugID.present()) { - g_traceBatch.addEvent( - "CommitDebug", self->debugID.get().first(), - "MasterProxyServer.commitBatch.AfterResolution" - ); + g_traceBatch.addEvent("CommitDebug", self->debugID.get().first(), + "CommitProxyServer.commitBatch.AfterResolution"); } return Void(); @@ -972,10 +960,8 @@ ACTOR Future postResolution(CommitBatchContext* self) { pProxyCommitData->stats.txnCommitResolved += trs.size(); if (debugID.present()) { - g_traceBatch.addEvent( - "CommitDebug", debugID.get().first(), - "MasterProxyServer.commitBatch.ProcessingMutations" - ); + g_traceBatch.addEvent("CommitDebug", debugID.get().first(), + "CommitProxyServer.commitBatch.ProcessingMutations"); } self->isMyFirstBatch = !pProxyCommitData->version; @@ -1041,7 +1027,8 @@ ACTOR Future postResolution(CommitBatchContext* self) { self->msg = self->storeCommits.back().first.get(); if (self->debugID.present()) - g_traceBatch.addEvent("CommitDebug", self->debugID.get().first(), "MasterProxyServer.commitBatch.AfterStoreCommits"); + g_traceBatch.addEvent("CommitDebug", self->debugID.get().first(), + "CommitProxyServer.commitBatch.AfterStoreCommits"); // txnState (transaction subsystem state) tag: message extracted from log adapter bool firstMessage = true; @@ -1129,7 +1116,7 @@ ACTOR Future reply(CommitBatchContext* self) { //TraceEvent("ProxyPushed", pProxyCommitData->dbgid).detail("PrevVersion", prevVersion).detail("Version", commitVersion); if (debugID.present()) - g_traceBatch.addEvent("CommitDebug", debugID.get().first(), "MasterProxyServer.commitBatch.AfterLogPush"); + g_traceBatch.addEvent("CommitDebug", debugID.get().first(), "CommitProxyServer.commitBatch.AfterLogPush"); for (auto &p : self->storeCommits) { ASSERT(!p.second.isReady()); @@ -1328,7 +1315,8 @@ ACTOR static Future doKeyServerLocationRequest( GetKeyServerLocationsReque return Void(); } -ACTOR static Future readRequestServer( MasterProxyInterface proxy, PromiseStream> addActor, ProxyCommitData* commitData ) { +ACTOR static Future readRequestServer(CommitProxyInterface proxy, PromiseStream> addActor, + ProxyCommitData* commitData) { loop { GetKeyServerLocationsRequest req = waitNext(proxy.getKeyServersLocations.getFuture()); //WARNING: this code is run at a high priority, so it needs to do as little work as possible @@ -1344,7 +1332,7 @@ ACTOR static Future readRequestServer( MasterProxyInterface proxy, Promise } } -ACTOR static Future rejoinServer( MasterProxyInterface proxy, ProxyCommitData* commitData ) { +ACTOR static Future rejoinServer(CommitProxyInterface proxy, ProxyCommitData* commitData) { // We can't respond to these requests until we have valid txnStateStore wait(commitData->validState.getFuture()); @@ -1413,8 +1401,7 @@ ACTOR static Future rejoinServer( MasterProxyInterface proxy, ProxyCommitD } } -ACTOR Future ddMetricsRequestServer(MasterProxyInterface proxy, Reference> db) -{ +ACTOR Future ddMetricsRequestServer(CommitProxyInterface proxy, Reference> db) { loop { choose { when(state GetDDMetricsRequest req = waitNext(proxy.getDDMetrics.getFuture())) @@ -1496,17 +1483,17 @@ ACTOR Future monitorRemoteCommitted(ProxyCommitData* self) { } ACTOR Future proxySnapCreate(ProxySnapRequest snapReq, ProxyCommitData* commitData) { - TraceEvent("SnapMasterProxy_SnapReqEnter") - .detail("SnapPayload", snapReq.snapPayload) - .detail("SnapUID", snapReq.snapUID); + TraceEvent("SnapCommitProxy_SnapReqEnter") + .detail("SnapPayload", snapReq.snapPayload) + .detail("SnapUID", snapReq.snapUID); try { // whitelist check ExecCmdValueString execArg(snapReq.snapPayload); StringRef binPath = execArg.getBinaryPath(); if (!isWhitelisted(commitData->whitelistedBinPathVec, binPath)) { - TraceEvent("SnapMasterProxy_WhiteListCheckFailed") - .detail("SnapPayload", snapReq.snapPayload) - .detail("SnapUID", snapReq.snapUID); + TraceEvent("SnapCommitProxy_WhiteListCheckFailed") + .detail("SnapPayload", snapReq.snapPayload) + .detail("SnapUID", snapReq.snapUID); throw snap_path_not_whitelisted(); } // db fully recovered check @@ -1516,9 +1503,9 @@ ACTOR Future proxySnapCreate(ProxySnapRequest snapReq, ProxyCommitData* co // Currently, snapshot of old tlog generation is not // supported and hence failing the snapshot request until // cluster is fully_recovered. - TraceEvent("SnapMasterProxy_ClusterNotFullyRecovered") - .detail("SnapPayload", snapReq.snapPayload) - .detail("SnapUID", snapReq.snapUID); + TraceEvent("SnapCommitProxy_ClusterNotFullyRecovered") + .detail("SnapPayload", snapReq.snapPayload) + .detail("SnapUID", snapReq.snapUID); throw snap_not_fully_recovered_unsupported(); } @@ -1531,9 +1518,9 @@ ACTOR Future proxySnapCreate(ProxySnapRequest snapReq, ProxyCommitData* co // FIXME: logAntiQuorum not supported, remove it later, // In version2, we probably don't need this limtiation, but this needs to be tested. if (logAntiQuorum > 0) { - TraceEvent("SnapMasterProxy_LogAnitQuorumNotSupported") - .detail("SnapPayload", snapReq.snapPayload) - .detail("SnapUID", snapReq.snapUID); + TraceEvent("SnapCommitProxy_LogAnitQuorumNotSupported") + .detail("SnapPayload", snapReq.snapPayload) + .detail("SnapUID", snapReq.snapUID); throw snap_log_anti_quorum_unsupported(); } @@ -1547,32 +1534,32 @@ ACTOR Future proxySnapCreate(ProxySnapRequest snapReq, ProxyCommitData* co try { wait(throwErrorOr(ddSnapReq)); } catch (Error& e) { - TraceEvent("SnapMasterProxy_DDSnapResponseError") - .detail("SnapPayload", snapReq.snapPayload) - .detail("SnapUID", snapReq.snapUID) - .error(e, true /*includeCancelled*/ ); + TraceEvent("SnapCommitProxy_DDSnapResponseError") + .detail("SnapPayload", snapReq.snapPayload) + .detail("SnapUID", snapReq.snapUID) + .error(e, true /*includeCancelled*/); throw e; } snapReq.reply.send(Void()); } catch (Error& e) { - TraceEvent("SnapMasterProxy_SnapReqError") - .detail("SnapPayload", snapReq.snapPayload) - .detail("SnapUID", snapReq.snapUID) - .error(e, true /*includeCancelled*/); + TraceEvent("SnapCommitProxy_SnapReqError") + .detail("SnapPayload", snapReq.snapPayload) + .detail("SnapUID", snapReq.snapUID) + .error(e, true /*includeCancelled*/); if (e.code() != error_code_operation_cancelled) { snapReq.reply.sendError(e); } else { throw e; } } - TraceEvent("SnapMasterProxy_SnapReqExit") - .detail("SnapPayload", snapReq.snapPayload) - .detail("SnapUID", snapReq.snapUID); + TraceEvent("SnapCommitProxy_SnapReqExit") + .detail("SnapPayload", snapReq.snapPayload) + .detail("SnapUID", snapReq.snapUID); return Void(); } ACTOR Future proxyCheckSafeExclusion(Reference> db, ExclusionSafetyCheckRequest req) { - TraceEvent("SafetyCheckMasterProxyBegin"); + TraceEvent("SafetyCheckCommitProxyBegin"); state ExclusionSafetyCheckReply reply(false); if (!db->get().distributor.present()) { TraceEvent(SevWarnAlways, "DataDistributorNotPresent").detail("Operation", "ExclusionSafetyCheck"); @@ -1586,7 +1573,7 @@ ACTOR Future proxyCheckSafeExclusion(Reference> db, DistributorExclusionSafetyCheckReply _reply = wait(throwErrorOr(safeFuture)); reply.safe = _reply.safe; } catch (Error& e) { - TraceEvent("SafetyCheckMasterProxyResponseError").error(e); + TraceEvent("SafetyCheckCommitProxyResponseError").error(e); if (e.code() != error_code_operation_cancelled) { req.reply.sendError(e); return Void(); @@ -1594,7 +1581,7 @@ ACTOR Future proxyCheckSafeExclusion(Reference> db, throw e; } } - TraceEvent("SafetyCheckMasterProxyFinish"); + TraceEvent("SafetyCheckCommitProxyFinish"); req.reply.send(reply); return Void(); } @@ -1631,15 +1618,10 @@ ACTOR Future reportTxnTagCommitCost(UID myID, Reference masterProxyServerCore( - MasterProxyInterface proxy, - MasterInterface master, - Reference> db, - LogEpoch epoch, - Version recoveryTransactionVersion, - bool firstProxy, - std::string whitelistBinPaths) -{ +ACTOR Future commitProxyServerCore(CommitProxyInterface proxy, MasterInterface master, + Reference> db, LogEpoch epoch, + Version recoveryTransactionVersion, bool firstProxy, + std::string whitelistBinPaths) { state ProxyCommitData commitData(proxy.id(), master, proxy.getConsistentReadVersion, recoveryTransactionVersion, proxy.commit, db, firstProxy); state Future sequenceFuture = (Sequence)0; @@ -1657,9 +1639,9 @@ ACTOR Future masterProxyServerCore( state GetHealthMetricsReply detailedHealthMetricsReply; addActor.send( waitFailureServer(proxy.waitFailure.getFuture()) ); - addActor.send( traceRole(Role::MASTER_PROXY, proxy.id()) ); + addActor.send(traceRole(Role::COMMIT_PROXY, proxy.id())); - //TraceEvent("ProxyInit1", proxy.id()); + //TraceEvent("CommitProxyInit1", proxy.id()); // Wait until we can load the "real" logsystem, since we don't support switching them currently while (!(commitData.db->get().master.id() == master.id() && commitData.db->get().recoveryState >= RecoveryState::RECOVERY_TRANSACTION)) { @@ -1701,7 +1683,7 @@ ACTOR Future masterProxyServerCore( (int)std::min(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_BYTES_MAX, std::max(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_BYTES_MIN, SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_BYTES_SCALE_BASE * - pow(commitData.db->get().client.masterProxies.size(), + pow(commitData.db->get().client.commitProxies.size(), SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_BYTES_SCALE_POWER))); commitBatcherActor = commitBatcher(&commitData, batchedCommits, proxy.commit.getFuture(), commitBatchByteLimit, commitBatchesMemoryLimit); @@ -1723,7 +1705,7 @@ ACTOR Future masterProxyServerCore( //WARNING: this code is run at a high priority, so it needs to do as little work as possible const vector &trs = batchedRequests.first; int batchBytes = batchedRequests.second; - //TraceEvent("MasterProxyCTR", proxy.id()).detail("CommitTransactions", trs.size()).detail("TransactionRate", transactionRate).detail("TransactionQueue", transactionQueue.size()).detail("ReleasedTransactionCount", transactionCount); + //TraceEvent("CommitProxyCTR", proxy.id()).detail("CommitTransactions", trs.size()).detail("TransactionRate", transactionRate).detail("TransactionQueue", transactionQueue.size()).detail("ReleasedTransactionCount", transactionCount); if (trs.size() || (commitData.db->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS && now() - lastCommit >= SERVER_KNOBS->MAX_COMMIT_BATCH_INTERVAL)) { lastCommit = now(); @@ -1824,27 +1806,27 @@ ACTOR Future masterProxyServerCore( } } -ACTOR Future checkRemoved(Reference> db, uint64_t recoveryCount, MasterProxyInterface myInterface) { +ACTOR Future checkRemoved(Reference> db, uint64_t recoveryCount, + CommitProxyInterface myInterface) { loop{ - if (db->get().recoveryCount >= recoveryCount && !std::count(db->get().client.masterProxies.begin(), db->get().client.masterProxies.end(), myInterface)) { + if (db->get().recoveryCount >= recoveryCount && + !std::count(db->get().client.commitProxies.begin(), db->get().client.commitProxies.end(), myInterface)) { throw worker_removed(); } wait(db->onChange()); } } -ACTOR Future masterProxyServer( - MasterProxyInterface proxy, - InitializeMasterProxyRequest req, - Reference> db, - std::string whitelistBinPaths) -{ +ACTOR Future commitProxyServer(CommitProxyInterface proxy, InitializeCommitProxyRequest req, + Reference> db, std::string whitelistBinPaths) { try { - state Future core = masterProxyServerCore(proxy, req.master, db, req.recoveryCount, req.recoveryTransactionVersion, req.firstProxy, whitelistBinPaths); + state Future core = + commitProxyServerCore(proxy, req.master, db, req.recoveryCount, req.recoveryTransactionVersion, + req.firstProxy, whitelistBinPaths); wait(core || checkRemoved(db, req.recoveryCount, proxy)); } catch (Error& e) { - TraceEvent("MasterProxyTerminated", proxy.id()).error(e, true); + TraceEvent("CommitProxyTerminated", proxy.id()).error(e, true); if (e.code() != error_code_worker_removed && e.code() != error_code_tlog_stopped && e.code() != error_code_master_tlog_failed && e.code() != error_code_coordinators_changed && diff --git a/fdbserver/GrvProxyServer.actor.cpp b/fdbserver/GrvProxyServer.actor.cpp index 8e09d67dea..c169b9422b 100644 --- a/fdbserver/GrvProxyServer.actor.cpp +++ b/fdbserver/GrvProxyServer.actor.cpp @@ -21,7 +21,7 @@ #include "fdbclient/Notified.h" #include "fdbserver/LogSystem.h" #include "fdbserver/LogSystemDiskQueueAdapter.h" -#include "fdbclient/MasterProxyInterface.h" +#include "fdbclient/CommitProxyInterface.h" #include "fdbclient/GrvProxyInterface.h" #include "fdbserver/WaitFailure.h" #include "fdbserver/WorkerInterface.actor.h" @@ -443,13 +443,13 @@ ACTOR Future sendGrvReplies(Future replyFuture, std:: TEST(true); // Auto TPS rate is unlimited } else { - TEST(true); // Proxy returning tag throttle + TEST(true); // GRV proxy returning tag throttle reply.tagThrottleInfo[tag.first] = tagItr->second; } } else { // This isn't required, but we might as well - TEST(true); // Proxy expiring tag throttle + TEST(true); // GRV proxy expiring tag throttle priorityThrottledTags.erase(tagItr); } } diff --git a/fdbserver/Knobs.cpp b/fdbserver/Knobs.cpp index 6122ca13ac..3f2ca37505 100644 --- a/fdbserver/Knobs.cpp +++ b/fdbserver/Knobs.cpp @@ -38,7 +38,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi init( MAX_VERSIONS_IN_FLIGHT_FORCED, 6e5 * VERSIONS_PER_SECOND ); //one week of versions init( MAX_READ_TRANSACTION_LIFE_VERSIONS, 5 * VERSIONS_PER_SECOND ); if (randomize && BUGGIFY) MAX_READ_TRANSACTION_LIFE_VERSIONS = VERSIONS_PER_SECOND; else if (randomize && BUGGIFY) MAX_READ_TRANSACTION_LIFE_VERSIONS = std::max(1, 0.1 * VERSIONS_PER_SECOND); else if( randomize && BUGGIFY ) MAX_READ_TRANSACTION_LIFE_VERSIONS = 10 * VERSIONS_PER_SECOND; init( MAX_WRITE_TRANSACTION_LIFE_VERSIONS, 5 * VERSIONS_PER_SECOND ); if (randomize && BUGGIFY) MAX_WRITE_TRANSACTION_LIFE_VERSIONS=std::max(1, 1 * VERSIONS_PER_SECOND); - init( MAX_COMMIT_BATCH_INTERVAL, 2.0 ); if( randomize && BUGGIFY ) MAX_COMMIT_BATCH_INTERVAL = 0.5; // Each master proxy generates a CommitTransactionBatchRequest at least this often, so that versions always advance smoothly + init( MAX_COMMIT_BATCH_INTERVAL, 2.0 ); if( randomize && BUGGIFY ) MAX_COMMIT_BATCH_INTERVAL = 0.5; // Each commit proxy generates a CommitTransactionBatchRequest at least this often, so that versions always advance smoothly MAX_COMMIT_BATCH_INTERVAL = std::min(MAX_COMMIT_BATCH_INTERVAL, MAX_READ_TRANSACTION_LIFE_VERSIONS/double(2*VERSIONS_PER_SECOND)); // Ensure that the proxy commits 2 times every MAX_READ_TRANSACTION_LIFE_VERSIONS, otherwise the master will not give out versions fast enough // TLogs @@ -328,7 +328,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi init( POLLING_FREQUENCY, 2.0 ); if( longLeaderElection ) POLLING_FREQUENCY = 8.0; init( HEARTBEAT_FREQUENCY, 0.5 ); if( longLeaderElection ) HEARTBEAT_FREQUENCY = 1.0; - // Master Proxy and GRV Proxy + // Commit CommitProxy and GRV CommitProxy init( START_TRANSACTION_BATCH_INTERVAL_MIN, 1e-6 ); init( START_TRANSACTION_BATCH_INTERVAL_MAX, 0.010 ); init( START_TRANSACTION_BATCH_INTERVAL_LATENCY_FRACTION, 0.5 ); diff --git a/fdbserver/Knobs.h b/fdbserver/Knobs.h index e36de5f2eb..a2d58922fe 100644 --- a/fdbserver/Knobs.h +++ b/fdbserver/Knobs.h @@ -37,10 +37,11 @@ public: int64_t MAX_VERSIONS_IN_FLIGHT_FORCED; int64_t MAX_READ_TRANSACTION_LIFE_VERSIONS; int64_t MAX_WRITE_TRANSACTION_LIFE_VERSIONS; - double MAX_COMMIT_BATCH_INTERVAL; // Each master proxy generates a CommitTransactionBatchRequest at least this often, so that versions always advance smoothly + double MAX_COMMIT_BATCH_INTERVAL; // Each commit proxy generates a CommitTransactionBatchRequest at least this + // often, so that versions always advance smoothly // TLogs - double TLOG_TIMEOUT; // tlog OR master proxy failure - master's reaction time + double TLOG_TIMEOUT; // tlog OR commit proxy failure - master's reaction time double RECOVERY_TLOG_SMART_QUORUM_DELAY; // smaller might be better for bug amplification double TLOG_STORAGE_MIN_UPDATE_INTERVAL; double BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL; @@ -262,7 +263,7 @@ public: double POLLING_FREQUENCY; double HEARTBEAT_FREQUENCY; - // Master Proxy + // Commit CommitProxy double START_TRANSACTION_BATCH_INTERVAL_MIN; double START_TRANSACTION_BATCH_INTERVAL_MAX; double START_TRANSACTION_BATCH_INTERVAL_LATENCY_FRACTION; diff --git a/fdbserver/Ratekeeper.actor.cpp b/fdbserver/Ratekeeper.actor.cpp index b27610ceb1..980b2bd806 100644 --- a/fdbserver/Ratekeeper.actor.cpp +++ b/fdbserver/Ratekeeper.actor.cpp @@ -527,7 +527,7 @@ struct RatekeeperLimits { {} }; -struct ProxyInfo { +struct CommitProxyInfo { int64_t totalTransactions; int64_t batchTransactions; uint64_t lastThrottledTagChangeId; @@ -535,7 +535,9 @@ struct ProxyInfo { double lastUpdateTime; double lastTagPushTime; - ProxyInfo() : totalTransactions(0), batchTransactions(0), lastUpdateTime(0), lastThrottledTagChangeId(0), lastTagPushTime(0) {} + CommitProxyInfo() + : totalTransactions(0), batchTransactions(0), lastUpdateTime(0), lastThrottledTagChangeId(0), lastTagPushTime(0) { + } }; struct RatekeeperData { @@ -545,7 +547,7 @@ struct RatekeeperData { Map storageQueueInfo; Map tlogQueueInfo; - std::map proxyInfo; + std::map commitProxyInfo; Smoother smoothReleasedTransactions, smoothBatchReleasedTransactions, smoothTotalDurableBytes; HealthMetrics healthMetrics; DatabaseConfiguration configuration; @@ -1260,31 +1262,31 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) { if (deterministicRandom()->random01() < 0.1) { std::string name = "RkUpdate" + limits->context; TraceEvent(name.c_str(), self->id) - .detail("TPSLimit", limits->tpsLimit) - .detail("Reason", limitReason) - .detail("ReasonServerID", reasonID==UID() ? std::string() : Traceable::toString(reasonID)) - .detail("ReleasedTPS", self->smoothReleasedTransactions.smoothRate()) - .detail("ReleasedBatchTPS", self->smoothBatchReleasedTransactions.smoothRate()) - .detail("TPSBasis", actualTps) - .detail("StorageServers", sscount) - .detail("GrvProxies", self->proxyInfo.size()) - .detail("TLogs", tlcount) - .detail("WorstFreeSpaceStorageServer", worstFreeSpaceStorageServer) - .detail("WorstFreeSpaceTLog", worstFreeSpaceTLog) - .detail("WorstStorageServerQueue", worstStorageQueueStorageServer) - .detail("LimitingStorageServerQueue", limitingStorageQueueStorageServer) - .detail("WorstTLogQueue", worstStorageQueueTLog) - .detail("TotalDiskUsageBytes", totalDiskUsageBytes) - .detail("WorstStorageServerVersionLag", worstVersionLag) - .detail("LimitingStorageServerVersionLag", limitingVersionLag) - .detail("WorstStorageServerDurabilityLag", worstDurabilityLag) - .detail("LimitingStorageServerDurabilityLag", limitingDurabilityLag) - .detail("TagsAutoThrottled", self->throttledTags.autoThrottleCount()) - .detail("TagsAutoThrottledBusyRead", self->throttledTags.busyReadTagCount) - .detail("TagsAutoThrottledBusyWrite", self->throttledTags.busyWriteTagCount) - .detail("TagsManuallyThrottled", self->throttledTags.manualThrottleCount()) - .detail("AutoThrottlingEnabled", self->autoThrottlingEnabled) - .trackLatest(name); + .detail("TPSLimit", limits->tpsLimit) + .detail("Reason", limitReason) + .detail("ReasonServerID", reasonID == UID() ? std::string() : Traceable::toString(reasonID)) + .detail("ReleasedTPS", self->smoothReleasedTransactions.smoothRate()) + .detail("ReleasedBatchTPS", self->smoothBatchReleasedTransactions.smoothRate()) + .detail("TPSBasis", actualTps) + .detail("StorageServers", sscount) + .detail("GrvProxies", self->commitProxyInfo.size()) + .detail("TLogs", tlcount) + .detail("WorstFreeSpaceStorageServer", worstFreeSpaceStorageServer) + .detail("WorstFreeSpaceTLog", worstFreeSpaceTLog) + .detail("WorstStorageServerQueue", worstStorageQueueStorageServer) + .detail("LimitingStorageServerQueue", limitingStorageQueueStorageServer) + .detail("WorstTLogQueue", worstStorageQueueTLog) + .detail("TotalDiskUsageBytes", totalDiskUsageBytes) + .detail("WorstStorageServerVersionLag", worstVersionLag) + .detail("LimitingStorageServerVersionLag", limitingVersionLag) + .detail("WorstStorageServerDurabilityLag", worstDurabilityLag) + .detail("LimitingStorageServerDurabilityLag", limitingDurabilityLag) + .detail("TagsAutoThrottled", self->throttledTags.autoThrottleCount()) + .detail("TagsAutoThrottledBusyRead", self->throttledTags.busyReadTagCount) + .detail("TagsAutoThrottledBusyWrite", self->throttledTags.busyWriteTagCount) + .detail("TagsManuallyThrottled", self->throttledTags.manualThrottleCount()) + .detail("AutoThrottlingEnabled", self->autoThrottlingEnabled) + .trackLatest(name); } } @@ -1369,9 +1371,9 @@ ACTOR Future ratekeeper(RatekeeperInterface rkInterf, Reference SERVER_KNOBS->LAST_LIMITED_RATIO * self.batchLimits.tpsLimit; double tooOld = now() - 1.0; - for(auto p=self.proxyInfo.begin(); p!=self.proxyInfo.end(); ) { + for (auto p = self.commitProxyInfo.begin(); p != self.commitProxyInfo.end();) { if (p->second.lastUpdateTime < tooOld) - p = self.proxyInfo.erase(p); + p = self.commitProxyInfo.erase(p); else ++p; } @@ -1380,7 +1382,7 @@ ACTOR Future ratekeeper(RatekeeperInterface rkInterf, Reference 0) { self.smoothReleasedTransactions.addDelta( req.totalReleasedTransactions - p.totalTransactions ); @@ -1397,8 +1399,8 @@ ACTOR Future ratekeeper(RatekeeperInterface rkInterf, ReferenceMETRIC_UPDATE_RATE; if(p.lastThrottledTagChangeId != self.throttledTagChangeId || now() > p.lastTagPushTime + SERVER_KNOBS->TAG_THROTTLE_PUSH_INTERVAL) { diff --git a/fdbserver/Resolver.actor.cpp b/fdbserver/Resolver.actor.cpp index 8a2cac8171..cdac445a40 100644 --- a/fdbserver/Resolver.actor.cpp +++ b/fdbserver/Resolver.actor.cpp @@ -243,7 +243,7 @@ ACTOR Future resolveBatch( // SOMEDAY: This is O(n) in number of proxies. O(log n) solution using appropriate data structure? Version oldestProxyVersion = req.version; for(auto itr = self->proxyInfoMap.begin(); itr != self->proxyInfoMap.end(); ++itr) { - //TraceEvent("ResolveBatchProxyVersion", self->dbgid).detail("Proxy", itr->first).detail("Version", itr->second.lastVersion); + //TraceEvent("ResolveBatchProxyVersion", self->dbgid).detail("CommitProxy", itr->first).detail("Version", itr->second.lastVersion); if(itr->first.isValid()) { // Don't consider the first master request oldestProxyVersion = std::min(itr->second.lastVersion, oldestProxyVersion); } @@ -311,7 +311,7 @@ ACTOR Future resolverCore( ResolverInterface resolver, InitializeResolverRequest initReq) { - state Reference self( new Resolver(resolver.id(), initReq.proxyCount, initReq.resolverCount) ); + state Reference self(new Resolver(resolver.id(), initReq.commitProxyCount, initReq.resolverCount)); state ActorCollection actors(false); state Future doPollMetrics = self->resolverCount > 1 ? Void() : Future(Never()); actors.add( waitFailureServer(resolver.waitFailure.getFuture()) ); diff --git a/fdbserver/SimulatedCluster.actor.cpp b/fdbserver/SimulatedCluster.actor.cpp index 367514ef0a..8a718982bc 100644 --- a/fdbserver/SimulatedCluster.actor.cpp +++ b/fdbserver/SimulatedCluster.actor.cpp @@ -733,7 +733,7 @@ void SimulationConfig::generateNormalConfig(int minimumReplication, int minimumR bool generateFearless = simple ? false : (minimumRegions > 1 || deterministicRandom()->random01() < 0.5); datacenters = simple ? 1 : ( generateFearless ? ( minimumReplication > 0 || deterministicRandom()->random01() < 0.5 ? 4 : 6 ) : deterministicRandom()->randomInt( 1, 4 ) ); if (deterministicRandom()->random01() < 0.25) db.desiredTLogCount = deterministicRandom()->randomInt(1,7); - if (deterministicRandom()->random01() < 0.25) db.proxyCount = deterministicRandom()->randomInt(1, 7); + if (deterministicRandom()->random01() < 0.25) db.commitProxyCount = deterministicRandom()->randomInt(1, 7); if (deterministicRandom()->random01() < 0.25) db.grvProxyCount = deterministicRandom()->randomInt(1, 4); if (deterministicRandom()->random01() < 0.25) db.resolverCount = deterministicRandom()->randomInt(1,7); int storage_engine_type = deterministicRandom()->randomInt(0, 4); @@ -770,7 +770,7 @@ void SimulationConfig::generateNormalConfig(int minimumReplication, int minimumR // set_config("memory-radixtree-beta"); if(simple) { db.desiredTLogCount = 1; - db.proxyCount = 1; + db.commitProxyCount = 1; db.grvProxyCount = 1; db.resolverCount = 1; } diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index 7759c193a4..6a2f674c97 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -574,7 +574,7 @@ struct RolesInfo { *pMetricVersion = metricVersion; return roles.insert( std::make_pair(iface.address(), obj ))->second; } - JsonBuilderObject& addRole(std::string const& role, MasterProxyInterface& iface, EventMap const& metrics) { + JsonBuilderObject& addRole(std::string const& role, CommitProxyInterface& iface, EventMap const& metrics) { JsonBuilderObject obj; obj["id"] = iface.id().shortString(); obj["role"] = role; @@ -646,11 +646,10 @@ ACTOR static Future processStatusFetcher( WorkerEvents mMetrics, WorkerEvents nMetrics, WorkerEvents errors, WorkerEvents traceFileOpenErrors, WorkerEvents programStarts, std::map> processIssues, vector> storageServers, - vector> tLogs, - vector> proxies, - vector> grvProxies, - ServerCoordinators coordinators, Database cx, Optional configuration, - Optional healthyZone, std::set* incomplete_reasons) { + vector> tLogs, vector> commitProxies, + vector> grvProxies, ServerCoordinators coordinators, Database cx, + Optional configuration, Optional healthyZone, + std::set* incomplete_reasons) { state JsonBuilderObject processMap; @@ -736,9 +735,9 @@ ACTOR static Future processStatusFetcher( roles.addCoordinatorRole(coordinator); } - state std::vector>::iterator proxy; - for(proxy = proxies.begin(); proxy != proxies.end(); ++proxy) { - roles.addRole( "proxy", proxy->first, proxy->second ); + state std::vector>::iterator commit_proxy; + for (commit_proxy = commitProxies.begin(); commit_proxy != commitProxies.end(); ++commit_proxy) { + roles.addRole("commit_proxy", commit_proxy->first, commit_proxy->second); wait(yield()); } @@ -1064,14 +1063,14 @@ ACTOR static Future recoveryStateStatusFetcher(WorkerDetails // Add additional metadata for certain statuses if (mStatusCode == RecoveryStatus::recruiting_transaction_servers) { int requiredLogs = atoi( md.getValue("RequiredTLogs").c_str() ); - int requiredProxies = atoi( md.getValue("RequiredProxies").c_str() ); + int requiredProxies = atoi(md.getValue("RequiredCommitProxies").c_str()); int requiredGrvProxies = atoi(md.getValue("RequiredGrvProxies").c_str()); int requiredResolvers = atoi( md.getValue("RequiredResolvers").c_str() ); //int requiredProcesses = std::max(requiredLogs, std::max(requiredResolvers, requiredProxies)); //int requiredMachines = std::max(requiredLogs, 1); message["required_logs"] = requiredLogs; - message["required_proxies"] = requiredProxies; + message["required_commit_proxies"] = requiredProxies; message["required_grv_proxies"] = requiredGrvProxies; message["required_resolvers"] = requiredResolvers; } else if (mStatusCode == RecoveryStatus::locking_old_transaction_servers) { @@ -1669,9 +1668,11 @@ ACTOR static Future>> getTLogsAndMetri return results; } -ACTOR static Future>> getProxiesAndMetrics(Reference> db, std::unordered_map address_workers) { - vector> results = wait(getServerMetrics( - db->get().client.masterProxies, address_workers, std::vector{ "CommitLatencyMetrics", "CommitLatencyBands" })); +ACTOR static Future>> getCommitProxiesAndMetrics( + Reference> db, std::unordered_map address_workers) { + vector> results = + wait(getServerMetrics(db->get().client.commitProxies, address_workers, + std::vector{ "CommitLatencyMetrics", "CommitLatencyBands" })); return results; } @@ -1755,16 +1756,18 @@ ACTOR static Future workloadStatusFetcher(Reference> proxyStatFutures; + state vector> commitProxyStatFutures; state vector> grvProxyStatFutures; std::map workersMap; for (auto const& w : workers) { workersMap[w.interf.address()] = w; } - for (auto &p : db->get().client.masterProxies) { + for (auto& p : db->get().client.commitProxies) { auto worker = getWorker(workersMap, p.address()); if (worker.present()) - proxyStatFutures.push_back(timeoutError(worker.get().interf.eventLogRequest.getReply(EventLogRequest(LiteralStringRef("ProxyMetrics"))), 1.0)); + commitProxyStatFutures.push_back(timeoutError( + worker.get().interf.eventLogRequest.getReply(EventLogRequest(LiteralStringRef("ProxyMetrics"))), + 1.0)); else throw all_alternatives_failed(); // We need data from all proxies for this result to be trustworthy } @@ -1775,7 +1778,7 @@ ACTOR static Future workloadStatusFetcher(Reference proxyStats = wait(getAll(proxyStatFutures)); + state vector commitProxyStats = wait(getAll(commitProxyStatFutures)); state vector grvProxyStats = wait(getAll(grvProxyStatFutures)); StatusCounter txnStartOut; @@ -1798,14 +1801,14 @@ ACTOR static Future workloadStatusFetcher(Reference clusterGetStatus( getProcessIssuesAsMessages(workerIssues); state vector> storageServers; state vector> tLogs; - state vector> proxies; + state vector> commit_proxies; state vector> grvProxies; state JsonBuilderObject qos; state JsonBuilderObject data_overlay; @@ -2504,7 +2507,8 @@ ACTOR Future clusterGetStatus( state Future>>> storageServerFuture = errorOr(getStorageServersAndMetrics(cx, address_workers, rkWorker)); state Future>>> tLogFuture = errorOr(getTLogsAndMetrics(db, address_workers)); - state Future>>> proxyFuture = errorOr(getProxiesAndMetrics(db, address_workers)); + state Future>>> commitProxyFuture = + errorOr(getCommitProxiesAndMetrics(db, address_workers)); state Future>>> grvProxyFuture = errorOr(getGrvProxiesAndMetrics(db, address_workers)); state int minReplicasRemaining = -1; @@ -2587,13 +2591,13 @@ ACTOR Future clusterGetStatus( messages.push_back(JsonBuilder::makeMessage("log_servers_error", "Timed out trying to retrieve log servers.")); } - // ...also proxies - ErrorOr>> _proxies = wait(proxyFuture); - if (_proxies.present()) { - proxies = _proxies.get(); - } - else { - messages.push_back(JsonBuilder::makeMessage("proxies_error", "Timed out trying to retrieve proxies.")); + // ...also commit proxies + ErrorOr>> _commit_proxies = wait(commitProxyFuture); + if (_commit_proxies.present()) { + commit_proxies = _commit_proxies.get(); + } else { + messages.push_back( + JsonBuilder::makeMessage("commit_proxies_error", "Timed out trying to retrieve commit proxies.")); } // ...also grv proxies @@ -2614,12 +2618,10 @@ ACTOR Future clusterGetStatus( statusObj["layers"] = layers; } - JsonBuilderObject processStatus = wait(processStatusFetcher(db, workers, pMetrics, mMetrics, networkMetrics, - latestError, traceFileOpenErrors, programStarts, - processIssues, storageServers, tLogs, proxies, - grvProxies, coordinators, cx, configuration, - loadResult.present() ? loadResult.get().healthyZone : Optional(), - &status_incomplete_reasons)); + JsonBuilderObject processStatus = wait(processStatusFetcher( + db, workers, pMetrics, mMetrics, networkMetrics, latestError, traceFileOpenErrors, programStarts, + processIssues, storageServers, tLogs, commit_proxies, grvProxies, coordinators, cx, configuration, + loadResult.present() ? loadResult.get().healthyZone : Optional(), &status_incomplete_reasons)); statusObj["processes"] = processStatus; statusObj["clients"] = clientStatusFetcher(clientStatus); diff --git a/fdbserver/WorkerInterface.actor.h b/fdbserver/WorkerInterface.actor.h index bf92f6afe5..74768844df 100644 --- a/fdbserver/WorkerInterface.actor.h +++ b/fdbserver/WorkerInterface.actor.h @@ -46,7 +46,7 @@ struct WorkerInterface { LocalityData locality; RequestStream< struct InitializeTLogRequest > tLog; RequestStream< struct RecruitMasterRequest > master; - RequestStream< struct InitializeMasterProxyRequest > masterProxy; + RequestStream commitProxy; RequestStream< struct InitializeGrvProxyRequest > grvProxy; RequestStream< struct InitializeDataDistributorRequest > dataDistributor; RequestStream< struct InitializeRatekeeperRequest > ratekeeper; @@ -81,7 +81,7 @@ struct WorkerInterface { clientInterface.initEndpoints(); tLog.getEndpoint( TaskPriority::Worker ); master.getEndpoint( TaskPriority::Worker ); - masterProxy.getEndpoint( TaskPriority::Worker ); + commitProxy.getEndpoint(TaskPriority::Worker); grvProxy.getEndpoint( TaskPriority::Worker ); resolver.getEndpoint( TaskPriority::Worker ); logRouter.getEndpoint( TaskPriority::Worker ); @@ -93,7 +93,10 @@ struct WorkerInterface { template void serialize(Ar& ar) { - serializer(ar, clientInterface, locality, tLog, master, masterProxy, grvProxy, dataDistributor, ratekeeper, resolver, storage, logRouter, debugPing, coordinationPing, waitFailure, setMetricsRate, eventLogRequest, traceBatchDumpRequest, testerInterface, diskStoreRequest, execReq, workerSnapReq, backup, updateServerDBInfo); + serializer(ar, clientInterface, locality, tLog, master, commitProxy, grvProxy, dataDistributor, ratekeeper, + resolver, storage, logRouter, debugPing, coordinationPing, waitFailure, setMetricsRate, + eventLogRequest, traceBatchDumpRequest, testerInterface, diskStoreRequest, execReq, workerSnapReq, + backup, updateServerDBInfo); } }; @@ -180,7 +183,7 @@ struct RegisterMasterRequest { UID id; LocalityData mi; LogSystemConfig logSystemConfig; - std::vector masterProxies; + std::vector commitProxies; std::vector grvProxies; std::vector resolvers; DBRecoveryCount recoveryCount; @@ -199,7 +202,7 @@ struct RegisterMasterRequest { if constexpr (!is_fb_function) { ASSERT(ar.protocolVersion().isValid()); } - serializer(ar, id, mi, logSystemConfig, masterProxies, grvProxies, resolvers, recoveryCount, registrationCount, + serializer(ar, id, mi, logSystemConfig, commitProxies, grvProxies, resolvers, recoveryCount, registrationCount, configuration, priorCommittedLogServers, recoveryState, recoveryStalled, reply); } }; @@ -209,7 +212,7 @@ struct RecruitFromConfigurationReply { std::vector backupWorkers; std::vector tLogs; std::vector satelliteTLogs; - std::vector masterProxies; + std::vector commitProxies; std::vector grvProxies; std::vector resolvers; std::vector storageServers; @@ -221,7 +224,7 @@ struct RecruitFromConfigurationReply { template void serialize(Ar& ar) { - serializer(ar, tLogs, satelliteTLogs, masterProxies, grvProxies, resolvers, storageServers, oldLogRouters, dcId, + serializer(ar, tLogs, satelliteTLogs, commitProxies, grvProxies, resolvers, storageServers, oldLogRouters, dcId, satelliteFallback, backupWorkers); } }; @@ -433,13 +436,13 @@ struct RecruitMasterRequest { } }; -struct InitializeMasterProxyRequest { +struct InitializeCommitProxyRequest { constexpr static FileIdentifier file_identifier = 10344153; MasterInterface master; uint64_t recoveryCount; Version recoveryTransactionVersion; bool firstProxy; - ReplyPromise reply; + ReplyPromise reply; template void serialize(Ar& ar) { @@ -488,13 +491,13 @@ struct InitializeRatekeeperRequest { struct InitializeResolverRequest { constexpr static FileIdentifier file_identifier = 7413317; uint64_t recoveryCount; - int proxyCount; + int commitProxyCount; int resolverCount; ReplyPromise reply; template void serialize(Ar& ar) { - serializer(ar, recoveryCount, proxyCount, resolverCount, reply); + serializer(ar, recoveryCount, commitProxyCount, resolverCount, reply); } }; @@ -672,7 +675,7 @@ struct Role { static const Role STORAGE_SERVER; static const Role TRANSACTION_LOG; static const Role SHARED_TRANSACTION_LOG; - static const Role MASTER_PROXY; + static const Role COMMIT_PROXY; static const Role GRV_PROXY; static const Role MASTER; static const Role RESOLVER; @@ -735,7 +738,7 @@ ACTOR Future storageServer(IKeyValueStore* persistentData, StorageServerIn Reference connFile ); // changes pssi->id() to be the recovered ID); // changes pssi->id() to be the recovered ID ACTOR Future masterServer(MasterInterface mi, Reference> db, Reference>> ccInterface, ServerCoordinators serverCoordinators, LifetimeToken lifetime, bool forceRecovery); -ACTOR Future masterProxyServer(MasterProxyInterface proxy, InitializeMasterProxyRequest req, +ACTOR Future commitProxyServer(CommitProxyInterface proxy, InitializeCommitProxyRequest req, Reference> db, std::string whitelistBinPaths); ACTOR Future grvProxyServer(GrvProxyInterface proxy, InitializeGrvProxyRequest req, Reference> db); ACTOR Future tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue, diff --git a/fdbserver/fdbserver.actor.cpp b/fdbserver/fdbserver.actor.cpp index 5c4bd54279..514f7e5122 100644 --- a/fdbserver/fdbserver.actor.cpp +++ b/fdbserver/fdbserver.actor.cpp @@ -542,9 +542,9 @@ static void printUsage( const char *name, bool devhelp ) { " The default value is 2GiB. When specified without a unit,\n" " MiB is assumed.\n"); printf(" -c CLASS, --class CLASS\n" - " Machine class (valid options are storage, transaction,\n" - " resolution, proxy, master, test, unset, stateless, log, router,\n" - " and cluster_controller).\n"); + " Machine class (valid options are storage, transaction,\n" + " resolution, grv_proxy, proxy, master, test, unset, stateless, log, router,\n" + " and cluster_controller).\n"); #ifndef TLS_DISABLED printf(TLS_HELP); #endif @@ -2028,7 +2028,8 @@ int main(int argc, char* argv[]) { } static_assert( LBLocalityData::Present, "Storage server interface should be load balanced" ); - static_assert( LBLocalityData::Present, "Master proxy interface should be load balanced" ); + static_assert(LBLocalityData::Present, "Commit proxy interface should be load balanced"); + static_assert(LBLocalityData::Present, "GRV proxy interface should be load balanced"); static_assert( LBLocalityData::Present, "TLog interface should be load balanced" ); static_assert( !LBLocalityData::Present, "Master interface should not be load balanced" ); } diff --git a/fdbserver/masterserver.actor.cpp b/fdbserver/masterserver.actor.cpp index ce5c993d77..22b9495310 100644 --- a/fdbserver/masterserver.actor.cpp +++ b/fdbserver/masterserver.actor.cpp @@ -207,8 +207,8 @@ struct MasterData : NonCopyable, ReferenceCounted { return maxLocality + 1; } - std::vector masterProxies; - std::vector provisionalMasterProxies; + std::vector commitProxies; + std::vector provisionalCommitProxies; std::vector grvProxies; std::vector provisionalGrvProxies; std::vector resolvers; @@ -283,21 +283,24 @@ struct MasterData : NonCopyable, ReferenceCounted { ~MasterData() { if(txnStateStore) txnStateStore->close(); } }; -ACTOR Future newMasterProxies( Reference self, RecruitFromConfigurationReply recr ) { - vector> initializationReplies; - for( int i = 0; i < recr.masterProxies.size(); i++ ) { - InitializeMasterProxyRequest req; +ACTOR Future newCommitProxies(Reference self, RecruitFromConfigurationReply recr) { + vector> initializationReplies; + for (int i = 0; i < recr.commitProxies.size(); i++) { + InitializeCommitProxyRequest req; req.master = self->myInterface; req.recoveryCount = self->cstate.myDBState.recoveryCount + 1; req.recoveryTransactionVersion = self->recoveryTransactionVersion; req.firstProxy = i == 0; - TraceEvent("MasterProxyReplies",self->dbgid).detail("WorkerID", recr.masterProxies[i].id()); - initializationReplies.push_back( transformErrors( throwErrorOr( recr.masterProxies[i].masterProxy.getReplyUnlessFailedFor( req, SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY ) ), master_recovery_failed() ) ); + TraceEvent("CommitProxyReplies", self->dbgid).detail("WorkerID", recr.commitProxies[i].id()); + initializationReplies.push_back( + transformErrors(throwErrorOr(recr.commitProxies[i].commitProxy.getReplyUnlessFailedFor( + req, SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)), + master_recovery_failed())); } - vector newRecruits = wait( getAll( initializationReplies ) ); - // It is required for the correctness of COMMIT_ON_FIRST_PROXY that self->proxies[0] is the firstProxy. - self->masterProxies = newRecruits; + vector newRecruits = wait(getAll(initializationReplies)); + // It is required for the correctness of COMMIT_ON_FIRST_PROXY that self->proxies[0] is the firstCommitProxy. + self->commitProxies = newRecruits; return Void(); } @@ -322,7 +325,7 @@ ACTOR Future newResolvers( Reference self, RecruitFromConfigur for( int i = 0; i < recr.resolvers.size(); i++ ) { InitializeResolverRequest req; req.recoveryCount = self->cstate.myDBState.recoveryCount + 1; - req.proxyCount = recr.masterProxies.size(); + req.commitProxyCount = recr.commitProxies.size(); req.resolverCount = recr.resolvers.size(); TraceEvent("ResolverReplies",self->dbgid).detail("WorkerID", recr.resolvers[i].id()); initializationReplies.push_back( transformErrors( throwErrorOr( recr.resolvers[i].resolver.getReplyUnlessFailedFor( req, SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY ) ), master_recovery_failed() ) ); @@ -426,15 +429,15 @@ ACTOR Future newSeedServers( Reference self, RecruitFromConfig return Void(); } -Future waitProxyFailure( vector const& proxies ) { +Future waitCommitProxyFailure(vector const& commitProxies) { std::vector> failed; - for (auto proxy : proxies) { - failed.push_back(waitFailureClient(proxy.waitFailure, SERVER_KNOBS->TLOG_TIMEOUT, + for (auto commitProxy : commitProxies) { + failed.push_back(waitFailureClient(commitProxy.waitFailure, SERVER_KNOBS->TLOG_TIMEOUT, -SERVER_KNOBS->TLOG_TIMEOUT / SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY, /*trace=*/true)); } ASSERT( failed.size() >= 1 ); - return tagError(quorum( failed, 1 ), master_proxy_failed()); + return tagError(quorum(failed, 1), commit_proxy_failed()); } Future waitGrvProxyFailure( vector const& grvProxies ) { @@ -499,14 +502,14 @@ ACTOR Future updateLogsValue( Reference self, Database cx ) { } Future sendMasterRegistration(MasterData* self, LogSystemConfig const& logSystemConfig, - vector proxies, vector grvProxies, + vector proxies, vector grvProxies, vector resolvers, DBRecoveryCount recoveryCount, vector priorCommittedLogServers) { RegisterMasterRequest masterReq; masterReq.id = self->myInterface.id(); masterReq.mi = self->myInterface.locality; masterReq.logSystemConfig = logSystemConfig; - masterReq.masterProxies = proxies; + masterReq.commitProxies = proxies; masterReq.grvProxies = grvProxies; masterReq.resolvers = resolvers; masterReq.recoveryCount = recoveryCount; @@ -536,14 +539,14 @@ ACTOR Future updateRegistration( Reference self, ReferencecstateUpdated.isSet()) { - wait(sendMasterRegistration(self.getPtr(), logSystemConfig, self->provisionalMasterProxies, + wait(sendMasterRegistration(self.getPtr(), logSystemConfig, self->provisionalCommitProxies, self->provisionalGrvProxies, self->resolvers, self->cstate.myDBState.recoveryCount, self->cstate.prevDBState.getPriorCommittedLogServers())); } else { updateLogsKey = updateLogsValue(self, cx); - wait(sendMasterRegistration(self.getPtr(), logSystemConfig, self->masterProxies, self->grvProxies, self->resolvers, - self->cstate.myDBState.recoveryCount, vector())); + wait(sendMasterRegistration(self.getPtr(), logSystemConfig, self->commitProxies, self->grvProxies, + self->resolvers, self->cstate.myDBState.recoveryCount, vector())); } } } @@ -551,14 +554,15 @@ ACTOR Future updateRegistration( Reference self, Reference> provisionalMaster( Reference parent, Future activate ) { wait(activate); - // Register a fake master proxy (to be provided right here) to make ourselves available to clients - parent->provisionalMasterProxies = vector(1); - parent->provisionalMasterProxies[0].provisional = true; - parent->provisionalMasterProxies[0].initEndpoints(); + // Register a fake commit proxy (to be provided right here) to make ourselves available to clients + parent->provisionalCommitProxies = vector(1); + parent->provisionalCommitProxies[0].provisional = true; + parent->provisionalCommitProxies[0].initEndpoints(); parent->provisionalGrvProxies = vector(1); parent->provisionalGrvProxies[0].provisional = true; parent->provisionalGrvProxies[0].initEndpoints(); - state Future waitMasterProxyFailure = waitFailureServer(parent->provisionalMasterProxies[0].waitFailure.getFuture()); + state Future waitCommitProxyFailure = + waitFailureServer(parent->provisionalCommitProxies[0].waitFailure.getFuture()); state Future waitGrvProxyFailure = waitFailureServer(parent->provisionalGrvProxies[0].waitFailure.getFuture()); parent->registrationTrigger.trigger(); @@ -567,8 +571,8 @@ ACTOR Future> provisionalMaster( Reference metadataVersion = parent->txnStateStore->readValue(metadataVersionKey).get(); - // We respond to a minimal subset of the master proxy protocol. Our sole purpose is to receive a single write-only transaction - // which might repair our configuration, and return it. + // We respond to a minimal subset of the commit proxy protocol. Our sole purpose is to receive a single write-only + // transaction which might repair our configuration, and return it. loop choose { when ( GetReadVersionRequest req = waitNext( parent->provisionalGrvProxies[0].getConsistentReadVersion.getFuture() ) ) { if ( req.flags & GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY && parent->lastEpochEnd ) { @@ -580,7 +584,7 @@ ACTOR Future> provisionalMaster( ReferenceprovisionalMasterProxies[0].commit.getFuture() ) ) { + when(CommitTransactionRequest req = waitNext(parent->provisionalCommitProxies[0].commit.getFuture())) { req.reply.send(Never()); // don't reply (clients always get commit_unknown_result) auto t = &req.transaction; if (t->read_snapshot == parent->lastEpochEnd && //< So no transactions can fall between the read snapshot and the recovery transaction this (might) be merged with @@ -600,10 +604,11 @@ ACTOR Future> provisionalMaster( ReferenceprovisionalMasterProxies[0].getKeyServersLocations.getFuture() ) ) { + when(GetKeyServerLocationsRequest req = + waitNext(parent->provisionalCommitProxies[0].getKeyServersLocations.getFuture())) { req.reply.send(Never()); } - when ( wait( waitMasterProxyFailure ) ) { throw worker_removed(); } + when(wait(waitCommitProxyFailure)) { throw worker_removed(); } when ( wait( waitGrvProxyFailure ) ) { throw worker_removed(); } } } @@ -634,8 +639,8 @@ ACTOR Future>> recruitEverything( Refere .detail("Status", RecoveryStatus::names[RecoveryStatus::recruiting_transaction_servers]) .detail("RequiredTLogs", self->configuration.tLogReplicationFactor) .detail("DesiredTLogs", self->configuration.getDesiredLogs()) - .detail("RequiredProxies", 1) - .detail("DesiredProxies", self->configuration.getDesiredProxies()) + .detail("RequiredCommitProxies", 1) + .detail("DesiredCommitProxies", self->configuration.getDesiredCommitProxies()) .detail("RequiredGrvProxies", 1) .detail("DesiredGrvProxies", self->configuration.getDesiredGrvProxies()) .detail("RequiredResolvers", 1) @@ -664,20 +669,20 @@ ACTOR Future>> recruitEverything( Refere self->backupWorkers.swap(recruits.backupWorkers); TraceEvent("MasterRecoveryState", self->dbgid) - .detail("StatusCode", RecoveryStatus::initializing_transaction_servers) - .detail("Status", RecoveryStatus::names[RecoveryStatus::initializing_transaction_servers]) - .detail("MasterProxies", recruits.masterProxies.size()) - .detail("GrvProxies", recruits.grvProxies.size()) - .detail("TLogs", recruits.tLogs.size()) - .detail("Resolvers", recruits.resolvers.size()) - .detail("BackupWorkers", self->backupWorkers.size()) - .trackLatest("MasterRecoveryState"); + .detail("StatusCode", RecoveryStatus::initializing_transaction_servers) + .detail("Status", RecoveryStatus::names[RecoveryStatus::initializing_transaction_servers]) + .detail("CommitProxies", recruits.commitProxies.size()) + .detail("GrvProxies", recruits.grvProxies.size()) + .detail("TLogs", recruits.tLogs.size()) + .detail("Resolvers", recruits.resolvers.size()) + .detail("BackupWorkers", self->backupWorkers.size()) + .trackLatest("MasterRecoveryState"); // Actually, newSeedServers does both the recruiting and initialization of the seed servers; so if this is a brand new database we are sort of lying that we are // past the recruitment phase. In a perfect world we would split that up so that the recruitment part happens above (in parallel with recruiting the transaction servers?). wait( newSeedServers( self, recruits, seedServers ) ); state vector> confChanges; - wait(newMasterProxies(self, recruits) && newGrvProxies(self, recruits) && newResolvers(self, recruits) && + wait(newCommitProxies(self, recruits) && newGrvProxies(self, recruits) && newResolvers(self, recruits) && newTLogServers(self, recruits, oldLogSystem, &confChanges)); return confChanges; } @@ -803,7 +808,7 @@ ACTOR Future sendInitialCommitToResolvers( Reference self ) { state int64_t dataOutstanding = 0; state std::vector endpoints; - for(auto& it : self->masterProxies) { + for (auto& it : self->commitProxies) { endpoints.push_back(it.txnState.getEndpoint()); } @@ -1042,8 +1047,7 @@ ACTOR Future getVersion(Reference self, GetCommitVersionReques ACTOR Future provideVersions(Reference self) { state ActorCollection versionActors(false); - for (auto& p : self->masterProxies) - self->lastProxyVersionReplies[p.id()] = ProxyVersionReplies(); + for (auto& p : self->commitProxies) self->lastProxyVersionReplies[p.id()] = ProxyVersionReplies(); loop { choose { @@ -1183,8 +1187,7 @@ ACTOR Future resolutionBalancing(Reference self) { // TraceEvent("KeyResolver").detail("Range", it.range()).detail("Value", it.value()); self->resolverChangesVersion = self->version + 1; - for (auto& p : self->masterProxies) - self->resolverNeedingChanges.insert(p.id()); + for (auto& p : self->commitProxies) self->resolverNeedingChanges.insert(p.id()); self->resolverChanges.set(movedRanges); } catch( Error&e ) { if(e.code() != error_code_operation_failed) @@ -1199,7 +1202,7 @@ static std::set const& normalMasterErrors() { if (s.empty()) { s.insert( error_code_tlog_stopped ); s.insert( error_code_master_tlog_failed ); - s.insert( error_code_master_proxy_failed ); + s.insert(error_code_commit_proxy_failed); s.insert( error_code_grv_proxy_failed ); s.insert( error_code_master_resolver_failed ); s.insert( error_code_master_backup_worker_failed ); @@ -1544,8 +1547,8 @@ ACTOR Future masterCore( Reference self ) { recoverAndEndEpoch.cancel(); - ASSERT(self->masterProxies.size() <= self->configuration.getDesiredProxies()); - ASSERT(self->masterProxies.size() >= 1); + ASSERT(self->commitProxies.size() <= self->configuration.getDesiredCommitProxies()); + ASSERT(self->commitProxies.size() >= 1); ASSERT(self->grvProxies.size() <= self->configuration.getDesiredGrvProxies()); ASSERT(self->grvProxies.size() >= 1); ASSERT( self->resolvers.size() <= self->configuration.getDesiredResolvers() ); @@ -1620,10 +1623,10 @@ ACTOR Future masterCore( Reference self ) { tr.read_snapshot = self->recoveryTransactionVersion; // lastEpochEnd would make more sense, but isn't in the initial window of the resolver(s) TraceEvent("MasterRecoveryCommit", self->dbgid); - state Future> recoveryCommit = self->masterProxies[0].commit.tryGetReply(recoveryCommitRequest); + state Future> recoveryCommit = self->commitProxies[0].commit.tryGetReply(recoveryCommitRequest); self->addActor.send( self->logSystem->onError() ); self->addActor.send( waitResolverFailure( self->resolvers ) ); - self->addActor.send( waitProxyFailure( self->masterProxies) ); + self->addActor.send(waitCommitProxyFailure(self->commitProxies)); self->addActor.send( waitGrvProxyFailure( self->grvProxies ) ); self->addActor.send( provideVersions(self) ); self->addActor.send( serveLiveCommittedVersion(self) ); @@ -1758,7 +1761,7 @@ ACTOR Future masterServer( MasterInterface mi, Reference replaceInterface( StorageServer* self, StorageServerInterface loop { state Future infoChanged = self->db->onChange(); - state Reference proxies( new ProxyInfo(self->db->get().client.masterProxies) ); + state Reference proxies(new CommitProxyInfo(self->db->get().client.commitProxies)); choose { - when( GetStorageServerRejoinInfoReply _rep = wait( proxies->size() ? basicLoadBalance( proxies, &MasterProxyInterface::getStorageServerRejoinInfo, GetStorageServerRejoinInfoRequest(ssi.id(), ssi.locality.dcId()) ) : Never() ) ) { + when(GetStorageServerRejoinInfoReply _rep = + wait(proxies->size() + ? basicLoadBalance(proxies, &CommitProxyInterface::getStorageServerRejoinInfo, + GetStorageServerRejoinInfoRequest(ssi.id(), ssi.locality.dcId())) + : Never())) { state GetStorageServerRejoinInfoReply rep = _rep; try { tr.reset(); diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 36c0a4d089..7ee360d825 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -114,13 +114,13 @@ ACTOR Future> broadcastDBInfoRequest(UpdateServerDBInfoReq } ACTOR static Future extractClientInfo( Reference> db, Reference> info ) { - state std::vector lastProxyUIDs; - state std::vector lastProxies; + state std::vector lastCommitProxyUIDs; + state std::vector lastCommitProxies; state std::vector lastGrvProxyUIDs; state std::vector lastGrvProxies; loop { ClientDBInfo ni = db->get().client; - shrinkProxyList(ni, lastProxyUIDs, lastProxies, lastGrvProxyUIDs, lastGrvProxies); + shrinkProxyList(ni, lastCommitProxyUIDs, lastCommitProxies, lastGrvProxyUIDs, lastGrvProxies); info->set( ni ); wait( db->onChange() ); } @@ -994,7 +994,7 @@ ACTOR Future workerServer( DUMPTOKEN(recruited.clientInterface.profiler); DUMPTOKEN(recruited.tLog); DUMPTOKEN(recruited.master); - DUMPTOKEN(recruited.masterProxy); + DUMPTOKEN(recruited.commitProxy); DUMPTOKEN(recruited.grvProxy); DUMPTOKEN(recruited.resolver); DUMPTOKEN(recruited.storage); @@ -1368,15 +1368,15 @@ ACTOR Future workerServer( } else forwardPromise( req.reply, storageCache.get( req.reqId ) ); } - when( InitializeMasterProxyRequest req = waitNext(interf.masterProxy.getFuture()) ) { - MasterProxyInterface recruited; + when(InitializeCommitProxyRequest req = waitNext(interf.commitProxy.getFuture())) { + CommitProxyInterface recruited; recruited.processId = locality.processId(); recruited.provisional = false; recruited.initEndpoints(); std::map details; details["ForMaster"] = req.master.id().shortString(); - startRole( Role::MASTER_PROXY, recruited.id(), interf.id(), details ); + startRole(Role::COMMIT_PROXY, recruited.id(), interf.id(), details); DUMPTOKEN(recruited.commit); DUMPTOKEN(recruited.getConsistentReadVersion); @@ -1385,9 +1385,10 @@ ACTOR Future workerServer( DUMPTOKEN(recruited.waitFailure); DUMPTOKEN(recruited.txnState); - //printf("Recruited as masterProxyServer\n"); - errorForwarders.add( zombie(recruited, forwardError( errors, Role::MASTER_PROXY, recruited.id(), - masterProxyServer( recruited, req, dbInfo, whitelistBinPaths ) ) ) ); + // printf("Recruited as commitProxyServer\n"); + errorForwarders.add( + zombie(recruited, forwardError(errors, Role::COMMIT_PROXY, recruited.id(), + commitProxyServer(recruited, req, dbInfo, whitelistBinPaths)))); req.reply.send(recruited); } when( InitializeGrvProxyRequest req = waitNext(interf.grvProxy.getFuture()) ) { @@ -1857,7 +1858,7 @@ const Role Role::WORKER("Worker", "WK", false); const Role Role::STORAGE_SERVER("StorageServer", "SS"); const Role Role::TRANSACTION_LOG("TLog", "TL"); const Role Role::SHARED_TRANSACTION_LOG("SharedTLog", "SL", false); -const Role Role::MASTER_PROXY("MasterProxyServer", "MP"); +const Role Role::COMMIT_PROXY("CommitProxyServer", "CP"); const Role Role::GRV_PROXY("GrvProxyServer", "GP"); const Role Role::MASTER("MasterServer", "MS"); const Role Role::RESOLVER("Resolver", "RV"); diff --git a/fdbserver/workloads/ConsistencyCheck.actor.cpp b/fdbserver/workloads/ConsistencyCheck.actor.cpp index 1ed5f484b1..f1ab535116 100644 --- a/fdbserver/workloads/ConsistencyCheck.actor.cpp +++ b/fdbserver/workloads/ConsistencyCheck.actor.cpp @@ -365,9 +365,9 @@ struct ConsistencyCheckWorkload : TestWorkload } } - //Get a list of storage servers from the master and compares them with the TLogs. - //If this is a quiescent check, then each master proxy needs to respond, otherwise only one needs to respond. - //Returns false if there is a failure (in this case, keyServersPromise will never be set) + // Get a list of storage servers from the master and compares them with the TLogs. + // If this is a quiescent check, then each commit proxy needs to respond, otherwise only one needs to respond. + // Returns false if there is a failure (in this case, keyServersPromise will never be set) ACTOR Future getKeyServers(Database cx, ConsistencyCheckWorkload *self, Promise>>> keyServersPromise) { state std::vector>> keyServers; @@ -380,13 +380,14 @@ struct ConsistencyCheckWorkload : TestWorkload state Span span(deterministicRandom()->randomUniqueID(), "WL:ConsistencyCheck"_loc); while (begin < end) { - state Reference proxyInfo = wait(cx->getMasterProxiesFuture(false)); + state Reference commitProxyInfo = wait(cx->getCommitProxiesFuture(false)); keyServerLocationFutures.clear(); - for (int i = 0; i < proxyInfo->size(); i++) + for (int i = 0; i < commitProxyInfo->size(); i++) keyServerLocationFutures.push_back( - proxyInfo->get(i, &MasterProxyInterface::getKeyServersLocations) + commitProxyInfo->get(i, &CommitProxyInterface::getKeyServersLocations) .getReplyUnlessFailedFor( - GetKeyServerLocationsRequest(span.context, begin, end, limitKeyServers, false, Arena()), 2, 0)); + GetKeyServerLocationsRequest(span.context, begin, end, limitKeyServers, false, Arena()), 2, + 0)); state bool keyServersInsertedForThisIteration = false; choose { @@ -399,8 +400,9 @@ struct ConsistencyCheckWorkload : TestWorkload //If performing quiescent check, then all master proxies should be reachable. Otherwise, only one needs to be reachable if (self->performQuiescentChecks && !shards.present()) { - TraceEvent("ConsistencyCheck_MasterProxyUnavailable").detail("MasterProxyID", proxyInfo->getId(i)); - self->testFailure("Master proxy unavailable"); + TraceEvent("ConsistencyCheck_CommitProxyUnavailable") + .detail("CommitProxyID", commitProxyInfo->getId(i)); + self->testFailure("Commit proxy unavailable"); return false; } @@ -1461,11 +1463,20 @@ struct ConsistencyCheckWorkload : TestWorkload return false; } - // Check proxy - ProcessClass::Fitness bestProxyFitness = getBestAvailableFitness(dcToNonExcludedClassTypes[masterDcId], ProcessClass::Proxy); - for (const auto& masterProxy : db.client.masterProxies) { - if (!nonExcludedWorkerProcessMap.count(masterProxy.address()) || nonExcludedWorkerProcessMap[masterProxy.address()].processClass.machineClassFitness(ProcessClass::Proxy) != bestProxyFitness) { - TraceEvent("ConsistencyCheck_ProxyNotBest").detail("BestProxyFitness", bestProxyFitness).detail("ExistingMasterProxyFitness", nonExcludedWorkerProcessMap.count(masterProxy.address()) ? nonExcludedWorkerProcessMap[masterProxy.address()].processClass.machineClassFitness(ProcessClass::Proxy) : -1); + // Check commit proxy + ProcessClass::Fitness bestCommitProxyFitness = + getBestAvailableFitness(dcToNonExcludedClassTypes[masterDcId], ProcessClass::CommitProxy); + for (const auto& commitProxy : db.client.commitProxies) { + if (!nonExcludedWorkerProcessMap.count(commitProxy.address()) || + nonExcludedWorkerProcessMap[commitProxy.address()].processClass.machineClassFitness( + ProcessClass::CommitProxy) != bestCommitProxyFitness) { + TraceEvent("ConsistencyCheck_CommitProxyNotBest") + .detail("BestCommitProxyFitness", bestCommitProxyFitness) + .detail("ExistingCommitProxyFitness", + nonExcludedWorkerProcessMap.count(commitProxy.address()) + ? nonExcludedWorkerProcessMap[commitProxy.address()].processClass.machineClassFitness( + ProcessClass::CommitProxy) + : -1); return false; } } diff --git a/fdbserver/workloads/Rollback.actor.cpp b/fdbserver/workloads/Rollback.actor.cpp index aab947efe6..2f4b7549ae 100644 --- a/fdbserver/workloads/Rollback.actor.cpp +++ b/fdbserver/workloads/Rollback.actor.cpp @@ -62,13 +62,13 @@ struct RollbackWorkload : TestWorkload { ACTOR Future simulateFailure( Database cx, RollbackWorkload* self ) { state ServerDBInfo system = self->dbInfo->get(); auto tlogs = system.logSystemConfig.allPresentLogs(); - - if( tlogs.empty() || system.client.masterProxies.empty() ) { + + if (tlogs.empty() || system.client.commitProxies.empty()) { TraceEvent(SevInfo, "UnableToTriggerRollback").detail("Reason", "No tlogs in System Map"); return Void(); } - state MasterProxyInterface proxy = deterministicRandom()->randomChoice( system.client.masterProxies); + state CommitProxyInterface proxy = deterministicRandom()->randomChoice(system.client.commitProxies); int utIndex = deterministicRandom()->randomInt(0, tlogs.size()); state NetworkAddress uncloggedTLog = tlogs[utIndex].address(); @@ -81,8 +81,8 @@ struct RollbackWorkload : TestWorkload { } TraceEvent("AttemptingToTriggerRollback") - .detail("Proxy", proxy.address()) - .detail("UncloggedTLog", uncloggedTLog); + .detail("CommitProxy", proxy.address()) + .detail("UncloggedTLog", uncloggedTLog); for (int t = 0; t < tlogs.size(); t++) { if (t != utIndex) { diff --git a/fdbserver/workloads/TargetedKill.actor.cpp b/fdbserver/workloads/TargetedKill.actor.cpp index 5eba5fd94f..de87ddec1c 100644 --- a/fdbserver/workloads/TargetedKill.actor.cpp +++ b/fdbserver/workloads/TargetedKill.actor.cpp @@ -87,19 +87,17 @@ struct TargetedKillWorkload : TestWorkload { NetworkAddress machine; if( self->machineToKill == "master" ) { machine = self->dbInfo->get().master.address(); - } - else if( self->machineToKill == "masterproxy" ) { - auto proxies = cx->getMasterProxies(false); + } else if (self->machineToKill == "commitproxy") { + auto proxies = cx->getCommitProxies(false); int o = deterministicRandom()->randomInt(0, proxies->size()); for( int i = 0; i < proxies->size(); i++) { - MasterProxyInterface mpi = proxies->getInterface(o); + CommitProxyInterface mpi = proxies->getInterface(o); machine = mpi.address(); if(machine != self->dbInfo->get().clusterInterface.getWorkers.getEndpoint().getPrimaryAddress()) break; o = ++o%proxies->size(); } - } - else if( self->machineToKill == "grvproxy" ) { + } else if (self->machineToKill == "grvproxy") { auto grvProxies = cx->getGrvProxies(false); int o = deterministicRandom()->randomInt(0, grvProxies->size()); for( int i = 0; i < grvProxies->size(); i++) { @@ -109,8 +107,7 @@ struct TargetedKillWorkload : TestWorkload { break; o = ++o%grvProxies->size(); } - } - else if( self->machineToKill == "tlog" ) { + } else if (self->machineToKill == "tlog") { auto tlogs = self->dbInfo->get().logSystemConfig.allPresentLogs(); int o = deterministicRandom()->randomInt(0, tlogs.size()); for( int i = 0; i < tlogs.size(); i++) { @@ -120,8 +117,8 @@ struct TargetedKillWorkload : TestWorkload { break; o = ++o%tlogs.size(); } - } - else if( self->machineToKill == "storage" || self->machineToKill == "ss" || self->machineToKill == "storageserver" ) { + } else if (self->machineToKill == "storage" || self->machineToKill == "ss" || + self->machineToKill == "storageserver") { int o = deterministicRandom()->randomInt(0,storageServers.size()); for( int i = 0; i < storageServers.size(); i++) { StorageServerInterface ssi = storageServers[o]; @@ -130,8 +127,7 @@ struct TargetedKillWorkload : TestWorkload { break; o = ++o%storageServers.size(); } - } - else if( self->machineToKill == "clustercontroller" || self->machineToKill == "cc" ) { + } else if (self->machineToKill == "clustercontroller" || self->machineToKill == "cc") { machine = self->dbInfo->get().clusterInterface.getWorkers.getEndpoint().getPrimaryAddress(); } diff --git a/flow/error_definitions.h b/flow/error_definitions.h index 040f8d865c..e746d9c18c 100755 --- a/flow/error_definitions.h +++ b/flow/error_definitions.h @@ -65,7 +65,7 @@ ERROR( database_locked, 1038, "Database is locked" ) ERROR( cluster_version_changed, 1039, "The protocol version of the cluster has changed" ) ERROR( external_client_already_loaded, 1040, "External client has already been loaded" ) ERROR( lookup_failed, 1041, "DNS lookup failed" ) -ERROR( proxy_memory_limit_exceeded, 1042, "Proxy commit memory limit exceeded" ) +ERROR( proxy_memory_limit_exceeded, 1042, "CommitProxy commit memory limit exceeded" ) ERROR( shutdown_in_progress, 1043, "Operation no longer supported due to shutdown" ) ERROR( serialization_failed, 1044, "Failed to deserialize an object" ) ERROR( connection_unreferenced, 1048, "No peer references for connection" ) @@ -89,12 +89,12 @@ ERROR( master_tlog_failed, 1205, "Master terminating because a TLog failed" ) ERROR( worker_recovery_failed, 1206, "Recovery of a worker process failed" ) ERROR( please_reboot, 1207, "Reboot of server process requested" ) ERROR( please_reboot_delete, 1208, "Reboot of server process requested, with deletion of state" ) -ERROR( master_proxy_failed, 1209, "Master terminating because a Proxy failed" ) +ERROR( commit_proxy_failed, 1209, "Master terminating because a Commit CommitProxy failed" ) ERROR( master_resolver_failed, 1210, "Master terminating because a Resolver failed" ) ERROR( server_overloaded, 1211, "Server is under too much load and cannot respond" ) ERROR( master_backup_worker_failed, 1212, "Master terminating because a backup worker failed") ERROR( tag_throttled, 1213, "Transaction tag is being throttled" ) -ERROR( grv_proxy_failed, 1214, "Master terminating because a GRV Proxy failed" ) +ERROR( grv_proxy_failed, 1214, "Master terminating because a GRV CommitProxy failed" ) // 15xx Platform errors ERROR( platform_error, 1500, "Platform error" ) diff --git a/tests/status/invalid_proc_addresses.json b/tests/status/invalid_proc_addresses.json index 5be40ba744..752d3ab41f 100644 --- a/tests/status/invalid_proc_addresses.json +++ b/tests/status/invalid_proc_addresses.json @@ -223,7 +223,7 @@ "roles" : [ { "id" : "f29c4c66f293d1b1", - "role" : "proxy" + "role" : "commit_proxy" }, { "id" : "44950eb0b3d862c0", @@ -264,7 +264,7 @@ "roles" : [ { "id" : "175f5bed1f306159", - "role" : "proxy" + "role" : "commit_proxy" }, { "id" : "e583f98ea591c52a", @@ -342,7 +342,7 @@ "roles" : [ { "id" : "c97dc5f2e372921b", - "role" : "proxy" + "role" : "commit_proxy" }, { "id" : "bbb368082d582712", diff --git a/tests/status/local_6_machine_no_replicas_remain.json b/tests/status/local_6_machine_no_replicas_remain.json index 7460096af4..bfd55b2cb0 100644 --- a/tests/status/local_6_machine_no_replicas_remain.json +++ b/tests/status/local_6_machine_no_replicas_remain.json @@ -172,7 +172,7 @@ }, { "id" : "066a9f0089483a5f", - "role" : "proxy" + "role" : "commit_proxy" }, { "id" : "d0809246b42910f8", @@ -213,7 +213,7 @@ "roles" : [ { "id" : "3fc3c3d9c9e3349d", - "role" : "proxy" + "role" : "commit_proxy" }, { "id" : "656697882cc0e76e", @@ -254,7 +254,7 @@ "roles" : [ { "id" : "586d54237f6bf4c7", - "role" : "proxy" + "role" : "commit_proxy" }, { "id" : "09a94118dc82393a", diff --git a/tests/status/separate_2_of_3_coordinators_remain.json b/tests/status/separate_2_of_3_coordinators_remain.json index 5e4b8ecfd6..6c8f8caade 100644 --- a/tests/status/separate_2_of_3_coordinators_remain.json +++ b/tests/status/separate_2_of_3_coordinators_remain.json @@ -130,7 +130,7 @@ }, { "id" : "9159f5bae811936d", - "role" : "proxy" + "role" : "commit_proxy" }, { "id" : "4ef3ec0982dab9fe", @@ -171,7 +171,7 @@ "roles" : [ { "id" : "9d158fb102da025f", - "role" : "proxy" + "role" : "commit_proxy" }, { "id" : "02fe9302ba499227", diff --git a/tests/status/separate_cannot_write_cluster_file.json b/tests/status/separate_cannot_write_cluster_file.json index 654651d797..1394ca43cb 100644 --- a/tests/status/separate_cannot_write_cluster_file.json +++ b/tests/status/separate_cannot_write_cluster_file.json @@ -140,7 +140,7 @@ }, { "id" : "00e48601e43045c9", - "role" : "proxy" + "role" : "commit_proxy" }, { "id" : "0df71fd71bbc14ee", @@ -181,7 +181,7 @@ }, { "id" : "07b3f5362cfec06b", - "role" : "proxy" + "role" : "commit_proxy" }, { "id" : "bb25c74aca56ccf7", @@ -222,7 +222,7 @@ "roles" : [ { "id" : "361d515d63a595ad", - "role" : "proxy" + "role" : "commit_proxy" }, { "id" : "4022c037e26868ae", diff --git a/tests/status/separate_idle.json b/tests/status/separate_idle.json index 636703aa0f..9ef918ae0a 100644 --- a/tests/status/separate_idle.json +++ b/tests/status/separate_idle.json @@ -118,7 +118,7 @@ }, { "id" : "4989d9993ee37183", - "role" : "proxy" + "role" : "commit_proxy" }, { "id" : "009709c84d97df4d", diff --git a/tests/status/separate_initializing.json b/tests/status/separate_initializing.json index aa552f3fe4..a24b155f46 100644 --- a/tests/status/separate_initializing.json +++ b/tests/status/separate_initializing.json @@ -113,7 +113,7 @@ }, { "id" : "4989d9993ee37183", - "role" : "proxy" + "role" : "commit_proxy" }, { "id" : "009709c84d97df4d", diff --git a/tests/status/separate_no_database.json b/tests/status/separate_no_database.json index 9966754a44..b7009bd89b 100644 --- a/tests/status/separate_no_database.json +++ b/tests/status/separate_no_database.json @@ -154,7 +154,7 @@ }, { "id" : "ae9fe51db979dfd1", - "role" : "proxy" + "role" : "commit_proxy" } ], "version" : "3.0.0-PRERELEASE" diff --git a/tests/status/separate_not_enough_servers.json b/tests/status/separate_not_enough_servers.json index 5e3589544b..9ae3a07d80 100644 --- a/tests/status/separate_not_enough_servers.json +++ b/tests/status/separate_not_enough_servers.json @@ -121,7 +121,7 @@ }, { "id" : "20beaadaa554dee3", - "role" : "proxy" + "role" : "commit_proxy" }, { "id" : "f0a33233db8e5f67", @@ -143,7 +143,7 @@ "description" : "Recruiting new transaction servers.", "name" : "recruiting_transaction_servers", "required_logs" : 3, - "required_proxies" : 1, + "required_commit_proxies" : 1, "required_grv_proxies" : 1, "required_resolvers" : 1 }, diff --git a/tests/status/single_process_too_many_config_params.json b/tests/status/single_process_too_many_config_params.json index 304f58b59b..875b9e245a 100644 --- a/tests/status/single_process_too_many_config_params.json +++ b/tests/status/single_process_too_many_config_params.json @@ -123,7 +123,7 @@ }, { "id" : "242e27cd68b21c05", - "role" : "proxy" + "role" : "commit_proxy" }, { "id" : "faf07cf91f0ab29d", From cc5bc16bd8ac8841a3378849b02b4cdbc8f1b21b Mon Sep 17 00:00:00 2001 From: Young Liu Date: Tue, 15 Sep 2020 22:29:49 -0700 Subject: [PATCH 2/3] Rename more places from proxy to commit proxy --- fdbcli/fdbcli.actor.cpp | 19 ++++---- fdbclient/CommitProxyInterface.h | 4 +- fdbclient/DatabaseConfiguration.cpp | 48 ++++++++++++------- fdbclient/DatabaseContext.h | 2 +- fdbclient/Knobs.cpp | 2 +- fdbclient/ManagementAPI.actor.cpp | 1 - fdbclient/NativeAPI.actor.cpp | 6 +-- fdbclient/vexillographer/fdb.options | 2 +- fdbrpc/Locality.h | 3 -- fdbserver/ApplyMetadataMutation.cpp | 4 +- fdbserver/ClusterController.actor.cpp | 4 +- fdbserver/Knobs.cpp | 2 +- fdbserver/Knobs.h | 2 +- fdbserver/MasterInterface.h | 2 +- fdbserver/Ratekeeper.actor.cpp | 18 +++---- fdbserver/Resolver.actor.cpp | 10 ++-- fdbserver/Status.actor.cpp | 16 +++---- fdbserver/masterserver.actor.cpp | 26 +++++----- fdbserver/storageserver.actor.cpp | 6 +-- .../workloads/ConfigureDatabase.actor.cpp | 2 +- fdbserver/workloads/TargetedKill.actor.cpp | 10 ++-- tests/status/separate_not_enough_servers.txt | 2 +- 22 files changed, 100 insertions(+), 91 deletions(-) diff --git a/fdbcli/fdbcli.actor.cpp b/fdbcli/fdbcli.actor.cpp index a304daa2ad..ac219df9e3 100644 --- a/fdbcli/fdbcli.actor.cpp +++ b/fdbcli/fdbcli.actor.cpp @@ -471,8 +471,7 @@ void initHelp() { helpMap["configure"] = CommandHelp( "configure [new] " "|grv_" - "proxies=|logs=|resolvers=>*", + "COMMIT_PROXIES>|grv_proxies=|logs=|resolvers=>*", "change the database configuration", "The `new' option, if present, initializes a new database with the given configuration rather than changing " "the configuration of an existing one. When used, both a redundancy mode and a storage engine must be " @@ -481,15 +480,13 @@ void initHelp() { "See the Admin Guide.\n three_datacenter - See the Admin Guide.\n\nStorage engine:\n ssd - B-Tree storage " "engine optimized for solid state disks.\n memory - Durable in-memory storage engine for small " "datasets.\n\ncommit_proxies=: Sets the desired number of commit proxies in the cluster. Must " - "be at least 1, or set " - "to -1 which restores the number of commit proxies to the default value.\n\ngrv_proxies=: Sets " - "the " - "desired number of GRV proxies in the cluster. Must be at least 1, or set to -1 which restores the number of " - "GRV proxies to the default value.\n\nlogs=: Sets the desired number of log servers in the cluster. Must " - "be " - "at least 1, or set to -1 which restores the number of logs to the default value.\n\nresolvers=: " - "Sets the desired number of resolvers in the cluster. Must be at least 1, or set to -1 which restores the " - "number of resolvers to the default value.\n\nSee the FoundationDB Administration Guide for more information."); + "be at least 1, or set to -1 which restores the number of commit proxies to the default " + "value.\n\ngrv_proxies=: Sets the desired number of GRV proxies in the cluster. Must be at least " + "1, or set to -1 which restores the number of GRV proxies to the default value.\n\nlogs=: Sets the " + "desired number of log servers in the cluster. Must be at least 1, or set to -1 which restores the number of " + "logs to the default value.\n\nresolvers=: Sets the desired number of resolvers in the cluster. " + "Must be at least 1, or set to -1 which restores the number of resolvers to the default value.\n\nSee the " + "FoundationDB Administration Guide for more information."); helpMap["fileconfigure"] = CommandHelp( "fileconfigure [new] ", "change the database configuration from a file", diff --git a/fdbclient/CommitProxyInterface.h b/fdbclient/CommitProxyInterface.h index c6b12dd7f2..ceba2cf0f8 100644 --- a/fdbclient/CommitProxyInterface.h +++ b/fdbclient/CommitProxyInterface.h @@ -100,10 +100,10 @@ struct CommitProxyInterface { struct ClientDBInfo { constexpr static FileIdentifier file_identifier = 5355080; UID id; // Changes each time anything else changes - vector< GrvProxyInterface > grvProxies; + vector grvProxies; vector commitProxies; Optional - firstCommitProxy; // not serialized, used for commitOnFirstProxy when the proxies vector has been shrunk + firstCommitProxy; // not serialized, used for commitOnFirstProxy when the commit proxies vector has been shrunk double clientTxnInfoSampleRate; int64_t clientTxnInfoSizeLimit; Optional forward; diff --git a/fdbclient/DatabaseConfiguration.cpp b/fdbclient/DatabaseConfiguration.cpp index f70fc4275c..464c220555 100644 --- a/fdbclient/DatabaseConfiguration.cpp +++ b/fdbclient/DatabaseConfiguration.cpp @@ -164,24 +164,40 @@ void DatabaseConfiguration::setDefaultReplicationPolicy() { } bool DatabaseConfiguration::isValid() const { - if (!(initialized && tLogWriteAntiQuorum >= 0 && tLogWriteAntiQuorum <= tLogReplicationFactor / 2 && - tLogReplicationFactor >= 1 && storageTeamSize >= 1 && getDesiredCommitProxies() >= 1 && - getDesiredGrvProxies() >= 1 && getDesiredLogs() >= 1 && getDesiredResolvers() >= 1 && - tLogVersion != TLogVersion::UNSET && tLogVersion >= TLogVersion::MIN_RECRUITABLE && - tLogVersion <= TLogVersion::MAX_SUPPORTED && tLogDataStoreType != KeyValueStoreType::END && - tLogSpillType != TLogSpillType::UNSET && - !(tLogSpillType == TLogSpillType::REFERENCE && tLogVersion < TLogVersion::V3) && - storageServerStoreType != KeyValueStoreType::END && autoCommitProxyCount >= 1 && autoGrvProxyCount >= 1 && - autoResolverCount >= 1 && autoDesiredTLogCount >= 1 && storagePolicy && tLogPolicy && - getDesiredRemoteLogs() >= 1 && remoteTLogReplicationFactor >= 0 && repopulateRegionAntiQuorum >= 0 && - repopulateRegionAntiQuorum <= 1 && usableRegions >= 1 && usableRegions <= 2 && regions.size() <= 2 && - (usableRegions == 1 || regions.size() == 2) && (regions.size() == 0 || regions[0].priority >= 0) && - (regions.size() == 0 || - tLogPolicy->info() != - "dcid^2 x zoneid^2 x 1"))) { // We cannot specify regions with three_datacenter replication + if( !(initialized && + tLogWriteAntiQuorum >= 0 && + tLogWriteAntiQuorum <= tLogReplicationFactor/2 && + tLogReplicationFactor >= 1 && + storageTeamSize >= 1 && + getDesiredCommitProxies() >= 1 && + getDesiredGrvProxies() >= 1 && + getDesiredLogs() >= 1 && + getDesiredResolvers() >= 1 && + tLogVersion != TLogVersion::UNSET && + tLogVersion >= TLogVersion::MIN_RECRUITABLE && + tLogVersion <= TLogVersion::MAX_SUPPORTED && + tLogDataStoreType != KeyValueStoreType::END && + tLogSpillType != TLogSpillType::UNSET && + !(tLogSpillType == TLogSpillType::REFERENCE && tLogVersion < TLogVersion::V3) && + storageServerStoreType != KeyValueStoreType::END && + autoCommitProxyCount >= 1 && + autoGrvProxyCount >= 1 && + autoResolverCount >= 1 && + autoDesiredTLogCount >= 1 && + storagePolicy && + tLogPolicy && + getDesiredRemoteLogs() >= 1 && + remoteTLogReplicationFactor >= 0 && + repopulateRegionAntiQuorum >= 0 && + repopulateRegionAntiQuorum <= 1 && + usableRegions >= 1 && + usableRegions <= 2 && + regions.size() <= 2 && + ( usableRegions == 1 || regions.size() == 2 ) && + ( regions.size() == 0 || regions[0].priority >= 0 ) && + ( regions.size() == 0 || tLogPolicy->info() != "dcid^2 x zoneid^2 x 1") ) ) { //We cannot specify regions with three_datacenter replication return false; } - std::set dcIds; dcIds.insert(Key()); for(auto& r : regions) { diff --git a/fdbclient/DatabaseContext.h b/fdbclient/DatabaseContext.h index f9367482e5..5652ae7a14 100644 --- a/fdbclient/DatabaseContext.h +++ b/fdbclient/DatabaseContext.h @@ -221,7 +221,7 @@ public: Future monitorProxiesInfoChange; Reference commitProxies; Reference grvProxies; - bool proxyProvisional; + bool proxyProvisional; // Provisional commit proxy and grv proxy are used at the same time. UID proxiesLastChange; LocalityData clientLocality; QueueModel queueModel; diff --git a/fdbclient/Knobs.cpp b/fdbclient/Knobs.cpp index d1ec7a4f5f..9ec850bc75 100644 --- a/fdbclient/Knobs.cpp +++ b/fdbclient/Knobs.cpp @@ -171,7 +171,7 @@ void ClientKnobs::initialize(bool randomize) { init( MIN_CLEANUP_SECONDS, 3600.0 ); // Configuration - init( DEFAULT_AUTO_COMMIT_PROXIES, 3 ); + init( DEFAULT_AUTO_COMMIT_PROXIES, 3 ); init( DEFAULT_AUTO_GRV_PROXIES, 1 ); init( DEFAULT_AUTO_RESOLVERS, 1 ); init( DEFAULT_AUTO_LOGS, 3 ); diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp index e4a5183b95..bd4b3b6e95 100644 --- a/fdbclient/ManagementAPI.actor.cpp +++ b/fdbclient/ManagementAPI.actor.cpp @@ -747,7 +747,6 @@ ConfigureAutoResult parseConfig( StatusObject const& status ) { proxyCount = result.old_commit_proxies; } - // Need to configure a good number. result.desired_grv_proxies = std::max(std::min(4, processCount / 20), 1); int grvProxyCount; if (!statusObjConfig.get("grv_proxies", result.old_grv_proxies)) { diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 19683a536b..62a0c1936d 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -1597,9 +1597,9 @@ Reference DatabaseContext::getGrvProxies(bool useProvisionalProxie // Actor which will wait until the MultiInterface returned by the DatabaseContext cx is not NULL ACTOR Future> getCommitProxiesFuture(DatabaseContext* cx, bool useProvisionalProxies) { loop{ - Reference proxies = cx->getCommitProxies(useProvisionalProxies); - if (proxies) - return proxies; + Reference commitProxies = cx->getCommitProxies(useProvisionalProxies); + if (commitProxies) + return commitProxies; wait( cx->onProxiesChanged() ); } } diff --git a/fdbclient/vexillographer/fdb.options b/fdbclient/vexillographer/fdb.options index f11956d79c..37e57346ee 100644 --- a/fdbclient/vexillographer/fdb.options +++ b/fdbclient/vexillographer/fdb.options @@ -195,7 +195,7 @@ description is not currently required but encouraged.