2017-05-26 04:48:44 +08:00
|
|
|
/*
|
|
|
|
* fdbserver.actor.cpp
|
|
|
|
*
|
|
|
|
* This source file is part of the FoundationDB open source project
|
|
|
|
*
|
2022-03-22 04:36:23 +08:00
|
|
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2019-02-06 08:46:08 +08:00
|
|
|
// There's something in one of the files below that defines a macros
|
|
|
|
// a macro that makes boost interprocess break on Windows.
|
|
|
|
#define BOOST_DATE_TIME_NO_LIB
|
2020-09-22 11:19:15 +08:00
|
|
|
|
|
|
|
#include <algorithm>
|
2020-07-08 00:06:13 +08:00
|
|
|
#include <cctype>
|
2020-09-22 11:19:15 +08:00
|
|
|
#include <fstream>
|
2020-07-08 00:06:13 +08:00
|
|
|
#include <iterator>
|
2020-09-22 11:19:15 +08:00
|
|
|
#include <sstream>
|
|
|
|
|
|
|
|
#include <stdarg.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <time.h>
|
|
|
|
|
2019-06-19 09:15:15 +08:00
|
|
|
#include <boost/algorithm/string.hpp>
|
2020-09-22 11:19:15 +08:00
|
|
|
#include <boost/interprocess/managed_shared_memory.hpp>
|
2019-02-06 08:46:08 +08:00
|
|
|
|
2022-02-23 17:23:27 +08:00
|
|
|
#include <fmt/printf.h>
|
2022-02-18 23:48:44 +08:00
|
|
|
|
2021-07-28 01:07:18 +08:00
|
|
|
#include "fdbclient/ActorLineageProfiler.h"
|
2021-10-11 11:44:56 +08:00
|
|
|
#include "fdbclient/ClusterConnectionFile.h"
|
2021-06-03 14:40:52 +08:00
|
|
|
#include "fdbclient/IKnobCollection.h"
|
2019-02-18 07:41:16 +08:00
|
|
|
#include "fdbclient/NativeAPI.actor.h"
|
2020-09-22 11:19:15 +08:00
|
|
|
#include "fdbclient/SystemData.h"
|
|
|
|
#include "fdbclient/versions.h"
|
2020-09-11 02:06:56 +08:00
|
|
|
#include "fdbclient/BuildFlags.h"
|
2022-06-24 07:03:53 +08:00
|
|
|
#include "fdbrpc/WellKnownEndpoints.h"
|
2023-01-01 07:22:28 +08:00
|
|
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
2022-02-04 07:33:58 +08:00
|
|
|
#include "fdbclient/SimpleIni.h"
|
2020-09-22 11:19:15 +08:00
|
|
|
#include "fdbrpc/AsyncFileCached.actor.h"
|
2022-02-18 23:48:44 +08:00
|
|
|
#include "fdbrpc/IPAllowList.h"
|
Remote ikvs debugging (#6465)
* initial structure for remote IKVS server
* moved struct to .h file, added new files to CMakeList
* happy path implementation, connection error when testing
* saved minor local change
* changed tracing to debug
* fixed onClosed and getError being called before init is finished
* fix spawn process bug, now use absolute path
* added server knob to set ikvs process port number
* added server knob for remote/local kv store
* implement simulator remote process spawning
* fixed bug for simulator timeout
* commit all changes
* removed print lines in trace
* added FlowProcess implementation by Markus
* initial debug of FlowProcess, stuck at parent sending OpenKVStoreRequest to child
* temporary fix for process factory throwing segfault on create
* specify public address in command
* change remote kv store knob to false for jenkins build
* made port 0 open random unused port
* change remote store knob to true for benchmark
* set listening port to randomly opened port
* added print lines for jenkins run open kv store timeout debug
* removed most tracing and print lines
* removed tutorial changes
* update handleIOErrors error handling to handle remote-ikvs cases
* Push all debugging changes
* A version where worker bug exists
* A version where restarting tests fail
* Use both the name and the port to determine the child process
* Remove unnecessary update on local address
* Disable remote-kvs for DiskFailureCycle test
* A version where restarting stuck
* A version where most restarting tests green
* Reset connection with child process explicitly
* Remove change on unnecessary files
* Unify flags from _ to -
* fix merging unexpected changes
* fix trac.error to .errorUnsuppressed
* Add license header
* Remove unnecessary header in FlowProcess.actor.cpp
* Fix Windows build
* Fix Windows build, add missing ;
* Fix a stupid bug caused by code dropped by code merging
* Disable remote kvs by default
* Pass the conn_file path to the flow process, though not needed, but the buildNetwork is difficult to tune
* serialization change on readrange
* Update traces
* Refactor the RemoteIKVS interface
* Format files
* Update sim2 interface to not clog connections between parent and child processes in simulation
* Update comments; remove debugging symbols; Add error handling for remote_kvs_cancelled
* Add comments, format files
* Change method name from isBuggifyDisabled to isStableConnection; Decrease(0.1x) latency for stable connections
* Commit the IConnection interface change, forgot in previous commit
* Fix the issue that onClosed request is cancelled by ActorCollection
* Enable the remote kv store knob
* Remove FlowProcess.actor.cpp and move functions to RemoteIKeyValueStore.actor.cpp; Add remote kv store delay to avoid race; Bind the child process to die with parent process
* Fix the bug where one process starts storage server more than once
* Add a please_reboot_remote_kv_store error to restart the storage server worker if remote kvs died abnormally
* Remove unreachable code path and add comments
* Clang format the code
* Fix a simple wait error
* Clang format after merging the main branch
* Testing mixed mode in simulation if remote_kvs knob is enabled, setting the default to false
* Disable remote kvs for PhysicalShardMove which is for RocksDB
* Cleanup #include orders, remove debugging traces
* Revert the reorder in fdbserver.actor.cpp, which fails the gcc build
Co-authored-by: “Lincoln <“lincoln.xiao@snowflake.com”>
2022-04-01 08:08:59 +08:00
|
|
|
#include "fdbrpc/FlowProcess.actor.h"
|
2020-09-22 11:19:15 +08:00
|
|
|
#include "fdbrpc/Net2FileSystem.h"
|
|
|
|
#include "fdbrpc/PerfMetric.h"
|
Remote ikvs debugging (#6465)
* initial structure for remote IKVS server
* moved struct to .h file, added new files to CMakeList
* happy path implementation, connection error when testing
* saved minor local change
* changed tracing to debug
* fixed onClosed and getError being called before init is finished
* fix spawn process bug, now use absolute path
* added server knob to set ikvs process port number
* added server knob for remote/local kv store
* implement simulator remote process spawning
* fixed bug for simulator timeout
* commit all changes
* removed print lines in trace
* added FlowProcess implementation by Markus
* initial debug of FlowProcess, stuck at parent sending OpenKVStoreRequest to child
* temporary fix for process factory throwing segfault on create
* specify public address in command
* change remote kv store knob to false for jenkins build
* made port 0 open random unused port
* change remote store knob to true for benchmark
* set listening port to randomly opened port
* added print lines for jenkins run open kv store timeout debug
* removed most tracing and print lines
* removed tutorial changes
* update handleIOErrors error handling to handle remote-ikvs cases
* Push all debugging changes
* A version where worker bug exists
* A version where restarting tests fail
* Use both the name and the port to determine the child process
* Remove unnecessary update on local address
* Disable remote-kvs for DiskFailureCycle test
* A version where restarting stuck
* A version where most restarting tests green
* Reset connection with child process explicitly
* Remove change on unnecessary files
* Unify flags from _ to -
* fix merging unexpected changes
* fix trac.error to .errorUnsuppressed
* Add license header
* Remove unnecessary header in FlowProcess.actor.cpp
* Fix Windows build
* Fix Windows build, add missing ;
* Fix a stupid bug caused by code dropped by code merging
* Disable remote kvs by default
* Pass the conn_file path to the flow process, though not needed, but the buildNetwork is difficult to tune
* serialization change on readrange
* Update traces
* Refactor the RemoteIKVS interface
* Format files
* Update sim2 interface to not clog connections between parent and child processes in simulation
* Update comments; remove debugging symbols; Add error handling for remote_kvs_cancelled
* Add comments, format files
* Change method name from isBuggifyDisabled to isStableConnection; Decrease(0.1x) latency for stable connections
* Commit the IConnection interface change, forgot in previous commit
* Fix the issue that onClosed request is cancelled by ActorCollection
* Enable the remote kv store knob
* Remove FlowProcess.actor.cpp and move functions to RemoteIKeyValueStore.actor.cpp; Add remote kv store delay to avoid race; Bind the child process to die with parent process
* Fix the bug where one process starts storage server more than once
* Add a please_reboot_remote_kv_store error to restart the storage server worker if remote kvs died abnormally
* Remove unreachable code path and add comments
* Clang format the code
* Fix a simple wait error
* Clang format after merging the main branch
* Testing mixed mode in simulation if remote_kvs knob is enabled, setting the default to false
* Disable remote kvs for PhysicalShardMove which is for RocksDB
* Cleanup #include orders, remove debugging traces
* Revert the reorder in fdbserver.actor.cpp, which fails the gcc build
Co-authored-by: “Lincoln <“lincoln.xiao@snowflake.com”>
2022-04-01 08:08:59 +08:00
|
|
|
#include "fdbrpc/fdbrpc.h"
|
2020-09-22 11:19:15 +08:00
|
|
|
#include "fdbrpc/simulator.h"
|
2018-10-20 01:30:13 +08:00
|
|
|
#include "fdbserver/ConflictSet.h"
|
2020-09-22 11:19:15 +08:00
|
|
|
#include "fdbserver/CoordinationInterface.h"
|
|
|
|
#include "fdbserver/CoroFlow.h"
|
2019-03-06 02:29:37 +08:00
|
|
|
#include "fdbserver/DataDistribution.actor.h"
|
Remote ikvs debugging (#6465)
* initial structure for remote IKVS server
* moved struct to .h file, added new files to CMakeList
* happy path implementation, connection error when testing
* saved minor local change
* changed tracing to debug
* fixed onClosed and getError being called before init is finished
* fix spawn process bug, now use absolute path
* added server knob to set ikvs process port number
* added server knob for remote/local kv store
* implement simulator remote process spawning
* fixed bug for simulator timeout
* commit all changes
* removed print lines in trace
* added FlowProcess implementation by Markus
* initial debug of FlowProcess, stuck at parent sending OpenKVStoreRequest to child
* temporary fix for process factory throwing segfault on create
* specify public address in command
* change remote kv store knob to false for jenkins build
* made port 0 open random unused port
* change remote store knob to true for benchmark
* set listening port to randomly opened port
* added print lines for jenkins run open kv store timeout debug
* removed most tracing and print lines
* removed tutorial changes
* update handleIOErrors error handling to handle remote-ikvs cases
* Push all debugging changes
* A version where worker bug exists
* A version where restarting tests fail
* Use both the name and the port to determine the child process
* Remove unnecessary update on local address
* Disable remote-kvs for DiskFailureCycle test
* A version where restarting stuck
* A version where most restarting tests green
* Reset connection with child process explicitly
* Remove change on unnecessary files
* Unify flags from _ to -
* fix merging unexpected changes
* fix trac.error to .errorUnsuppressed
* Add license header
* Remove unnecessary header in FlowProcess.actor.cpp
* Fix Windows build
* Fix Windows build, add missing ;
* Fix a stupid bug caused by code dropped by code merging
* Disable remote kvs by default
* Pass the conn_file path to the flow process, though not needed, but the buildNetwork is difficult to tune
* serialization change on readrange
* Update traces
* Refactor the RemoteIKVS interface
* Format files
* Update sim2 interface to not clog connections between parent and child processes in simulation
* Update comments; remove debugging symbols; Add error handling for remote_kvs_cancelled
* Add comments, format files
* Change method name from isBuggifyDisabled to isStableConnection; Decrease(0.1x) latency for stable connections
* Commit the IConnection interface change, forgot in previous commit
* Fix the issue that onClosed request is cancelled by ActorCollection
* Enable the remote kv store knob
* Remove FlowProcess.actor.cpp and move functions to RemoteIKeyValueStore.actor.cpp; Add remote kv store delay to avoid race; Bind the child process to die with parent process
* Fix the bug where one process starts storage server more than once
* Add a please_reboot_remote_kv_store error to restart the storage server worker if remote kvs died abnormally
* Remove unreachable code path and add comments
* Clang format the code
* Fix a simple wait error
* Clang format after merging the main branch
* Testing mixed mode in simulation if remote_kvs knob is enabled, setting the default to false
* Disable remote kvs for PhysicalShardMove which is for RocksDB
* Cleanup #include orders, remove debugging traces
* Revert the reorder in fdbserver.actor.cpp, which fails the gcc build
Co-authored-by: “Lincoln <“lincoln.xiao@snowflake.com”>
2022-04-01 08:08:59 +08:00
|
|
|
#include "fdbserver/FDBExecHelper.actor.h"
|
2018-10-20 01:30:13 +08:00
|
|
|
#include "fdbserver/IKeyValueStore.h"
|
2020-09-22 11:19:15 +08:00
|
|
|
#include "fdbserver/MoveKeys.actor.h"
|
|
|
|
#include "fdbserver/NetworkTest.h"
|
Remote ikvs debugging (#6465)
* initial structure for remote IKVS server
* moved struct to .h file, added new files to CMakeList
* happy path implementation, connection error when testing
* saved minor local change
* changed tracing to debug
* fixed onClosed and getError being called before init is finished
* fix spawn process bug, now use absolute path
* added server knob to set ikvs process port number
* added server knob for remote/local kv store
* implement simulator remote process spawning
* fixed bug for simulator timeout
* commit all changes
* removed print lines in trace
* added FlowProcess implementation by Markus
* initial debug of FlowProcess, stuck at parent sending OpenKVStoreRequest to child
* temporary fix for process factory throwing segfault on create
* specify public address in command
* change remote kv store knob to false for jenkins build
* made port 0 open random unused port
* change remote store knob to true for benchmark
* set listening port to randomly opened port
* added print lines for jenkins run open kv store timeout debug
* removed most tracing and print lines
* removed tutorial changes
* update handleIOErrors error handling to handle remote-ikvs cases
* Push all debugging changes
* A version where worker bug exists
* A version where restarting tests fail
* Use both the name and the port to determine the child process
* Remove unnecessary update on local address
* Disable remote-kvs for DiskFailureCycle test
* A version where restarting stuck
* A version where most restarting tests green
* Reset connection with child process explicitly
* Remove change on unnecessary files
* Unify flags from _ to -
* fix merging unexpected changes
* fix trac.error to .errorUnsuppressed
* Add license header
* Remove unnecessary header in FlowProcess.actor.cpp
* Fix Windows build
* Fix Windows build, add missing ;
* Fix a stupid bug caused by code dropped by code merging
* Disable remote kvs by default
* Pass the conn_file path to the flow process, though not needed, but the buildNetwork is difficult to tune
* serialization change on readrange
* Update traces
* Refactor the RemoteIKVS interface
* Format files
* Update sim2 interface to not clog connections between parent and child processes in simulation
* Update comments; remove debugging symbols; Add error handling for remote_kvs_cancelled
* Add comments, format files
* Change method name from isBuggifyDisabled to isStableConnection; Decrease(0.1x) latency for stable connections
* Commit the IConnection interface change, forgot in previous commit
* Fix the issue that onClosed request is cancelled by ActorCollection
* Enable the remote kv store knob
* Remove FlowProcess.actor.cpp and move functions to RemoteIKeyValueStore.actor.cpp; Add remote kv store delay to avoid race; Bind the child process to die with parent process
* Fix the bug where one process starts storage server more than once
* Add a please_reboot_remote_kv_store error to restart the storage server worker if remote kvs died abnormally
* Remove unreachable code path and add comments
* Clang format the code
* Fix a simple wait error
* Clang format after merging the main branch
* Testing mixed mode in simulation if remote_kvs knob is enabled, setting the default to false
* Disable remote kvs for PhysicalShardMove which is for RocksDB
* Cleanup #include orders, remove debugging traces
* Revert the reorder in fdbserver.actor.cpp, which fails the gcc build
Co-authored-by: “Lincoln <“lincoln.xiao@snowflake.com”>
2022-04-01 08:08:59 +08:00
|
|
|
#include "fdbserver/RemoteIKeyValueStore.actor.h"
|
2021-05-31 02:51:47 +08:00
|
|
|
#include "fdbserver/RestoreWorkerInterface.actor.h"
|
2020-09-22 11:19:15 +08:00
|
|
|
#include "fdbserver/ServerDBInfo.h"
|
2018-10-20 01:30:13 +08:00
|
|
|
#include "fdbserver/SimulatedCluster.h"
|
2022-10-05 07:01:02 +08:00
|
|
|
#include "fdbserver/Status.actor.h"
|
2019-02-18 11:25:16 +08:00
|
|
|
#include "fdbserver/TesterInterface.actor.h"
|
2020-09-22 11:19:15 +08:00
|
|
|
#include "fdbserver/WorkerInterface.actor.h"
|
|
|
|
#include "fdbserver/pubsub.h"
|
2022-07-26 13:12:28 +08:00
|
|
|
#include "fdbserver/OnDemandStore.h"
|
2019-02-18 11:18:30 +08:00
|
|
|
#include "fdbserver/workloads/workloads.actor.h"
|
2021-12-15 04:01:44 +08:00
|
|
|
#include "flow/ArgParseUtil.h"
|
2020-09-22 11:19:15 +08:00
|
|
|
#include "flow/DeterministicRandom.h"
|
|
|
|
#include "flow/Platform.h"
|
2020-10-24 02:50:17 +08:00
|
|
|
#include "flow/ProtocolVersion.h"
|
2022-06-28 07:05:55 +08:00
|
|
|
#include "SimpleOpt/SimpleOpt.h"
|
2020-09-22 11:19:15 +08:00
|
|
|
#include "flow/SystemMonitor.h"
|
2020-03-05 12:14:47 +08:00
|
|
|
#include "flow/TLSConfig.actor.h"
|
2022-06-24 08:05:36 +08:00
|
|
|
#include "fdbclient/Tracing.h"
|
2021-06-04 06:10:04 +08:00
|
|
|
#include "flow/WriteOnlySet.h"
|
2021-04-05 12:36:05 +08:00
|
|
|
#include "flow/UnitTest.h"
|
2021-07-24 07:28:20 +08:00
|
|
|
#include "flow/FaultInjection.h"
|
Remote ikvs debugging (#6465)
* initial structure for remote IKVS server
* moved struct to .h file, added new files to CMakeList
* happy path implementation, connection error when testing
* saved minor local change
* changed tracing to debug
* fixed onClosed and getError being called before init is finished
* fix spawn process bug, now use absolute path
* added server knob to set ikvs process port number
* added server knob for remote/local kv store
* implement simulator remote process spawning
* fixed bug for simulator timeout
* commit all changes
* removed print lines in trace
* added FlowProcess implementation by Markus
* initial debug of FlowProcess, stuck at parent sending OpenKVStoreRequest to child
* temporary fix for process factory throwing segfault on create
* specify public address in command
* change remote kv store knob to false for jenkins build
* made port 0 open random unused port
* change remote store knob to true for benchmark
* set listening port to randomly opened port
* added print lines for jenkins run open kv store timeout debug
* removed most tracing and print lines
* removed tutorial changes
* update handleIOErrors error handling to handle remote-ikvs cases
* Push all debugging changes
* A version where worker bug exists
* A version where restarting tests fail
* Use both the name and the port to determine the child process
* Remove unnecessary update on local address
* Disable remote-kvs for DiskFailureCycle test
* A version where restarting stuck
* A version where most restarting tests green
* Reset connection with child process explicitly
* Remove change on unnecessary files
* Unify flags from _ to -
* fix merging unexpected changes
* fix trac.error to .errorUnsuppressed
* Add license header
* Remove unnecessary header in FlowProcess.actor.cpp
* Fix Windows build
* Fix Windows build, add missing ;
* Fix a stupid bug caused by code dropped by code merging
* Disable remote kvs by default
* Pass the conn_file path to the flow process, though not needed, but the buildNetwork is difficult to tune
* serialization change on readrange
* Update traces
* Refactor the RemoteIKVS interface
* Format files
* Update sim2 interface to not clog connections between parent and child processes in simulation
* Update comments; remove debugging symbols; Add error handling for remote_kvs_cancelled
* Add comments, format files
* Change method name from isBuggifyDisabled to isStableConnection; Decrease(0.1x) latency for stable connections
* Commit the IConnection interface change, forgot in previous commit
* Fix the issue that onClosed request is cancelled by ActorCollection
* Enable the remote kv store knob
* Remove FlowProcess.actor.cpp and move functions to RemoteIKeyValueStore.actor.cpp; Add remote kv store delay to avoid race; Bind the child process to die with parent process
* Fix the bug where one process starts storage server more than once
* Add a please_reboot_remote_kv_store error to restart the storage server worker if remote kvs died abnormally
* Remove unreachable code path and add comments
* Clang format the code
* Fix a simple wait error
* Clang format after merging the main branch
* Testing mixed mode in simulation if remote_kvs knob is enabled, setting the default to false
* Disable remote kvs for PhysicalShardMove which is for RocksDB
* Cleanup #include orders, remove debugging traces
* Revert the reorder in fdbserver.actor.cpp, which fails the gcc build
Co-authored-by: “Lincoln <“lincoln.xiao@snowflake.com”>
2022-04-01 08:08:59 +08:00
|
|
|
#include "flow/flow.h"
|
|
|
|
#include "flow/network.h"
|
2019-03-11 22:31:44 +08:00
|
|
|
|
2020-02-02 02:00:06 +08:00
|
|
|
#if defined(__linux__) || defined(__FreeBSD__)
|
2017-05-26 04:48:44 +08:00
|
|
|
#include <execinfo.h>
|
|
|
|
#include <signal.h>
|
2022-09-04 04:21:01 +08:00
|
|
|
#if defined(__linux__)
|
Remote ikvs debugging (#6465)
* initial structure for remote IKVS server
* moved struct to .h file, added new files to CMakeList
* happy path implementation, connection error when testing
* saved minor local change
* changed tracing to debug
* fixed onClosed and getError being called before init is finished
* fix spawn process bug, now use absolute path
* added server knob to set ikvs process port number
* added server knob for remote/local kv store
* implement simulator remote process spawning
* fixed bug for simulator timeout
* commit all changes
* removed print lines in trace
* added FlowProcess implementation by Markus
* initial debug of FlowProcess, stuck at parent sending OpenKVStoreRequest to child
* temporary fix for process factory throwing segfault on create
* specify public address in command
* change remote kv store knob to false for jenkins build
* made port 0 open random unused port
* change remote store knob to true for benchmark
* set listening port to randomly opened port
* added print lines for jenkins run open kv store timeout debug
* removed most tracing and print lines
* removed tutorial changes
* update handleIOErrors error handling to handle remote-ikvs cases
* Push all debugging changes
* A version where worker bug exists
* A version where restarting tests fail
* Use both the name and the port to determine the child process
* Remove unnecessary update on local address
* Disable remote-kvs for DiskFailureCycle test
* A version where restarting stuck
* A version where most restarting tests green
* Reset connection with child process explicitly
* Remove change on unnecessary files
* Unify flags from _ to -
* fix merging unexpected changes
* fix trac.error to .errorUnsuppressed
* Add license header
* Remove unnecessary header in FlowProcess.actor.cpp
* Fix Windows build
* Fix Windows build, add missing ;
* Fix a stupid bug caused by code dropped by code merging
* Disable remote kvs by default
* Pass the conn_file path to the flow process, though not needed, but the buildNetwork is difficult to tune
* serialization change on readrange
* Update traces
* Refactor the RemoteIKVS interface
* Format files
* Update sim2 interface to not clog connections between parent and child processes in simulation
* Update comments; remove debugging symbols; Add error handling for remote_kvs_cancelled
* Add comments, format files
* Change method name from isBuggifyDisabled to isStableConnection; Decrease(0.1x) latency for stable connections
* Commit the IConnection interface change, forgot in previous commit
* Fix the issue that onClosed request is cancelled by ActorCollection
* Enable the remote kv store knob
* Remove FlowProcess.actor.cpp and move functions to RemoteIKeyValueStore.actor.cpp; Add remote kv store delay to avoid race; Bind the child process to die with parent process
* Fix the bug where one process starts storage server more than once
* Add a please_reboot_remote_kv_store error to restart the storage server worker if remote kvs died abnormally
* Remove unreachable code path and add comments
* Clang format the code
* Fix a simple wait error
* Clang format after merging the main branch
* Testing mixed mode in simulation if remote_kvs knob is enabled, setting the default to false
* Disable remote kvs for PhysicalShardMove which is for RocksDB
* Cleanup #include orders, remove debugging traces
* Revert the reorder in fdbserver.actor.cpp, which fails the gcc build
Co-authored-by: “Lincoln <“lincoln.xiao@snowflake.com”>
2022-04-01 08:08:59 +08:00
|
|
|
#include <sys/prctl.h>
|
2022-09-04 04:21:01 +08:00
|
|
|
#elif defined(__FreeBSD__)
|
|
|
|
#include <sys/procctl.h>
|
|
|
|
#endif
|
2017-05-26 04:48:44 +08:00
|
|
|
#ifdef ALLOC_INSTRUMENTATION
|
|
|
|
#include <cxxabi.h>
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
2019-02-07 11:27:38 +08:00
|
|
|
#ifdef WIN32
|
|
|
|
#define NOMINMAX
|
|
|
|
#define WIN32_LEAN_AND_MEAN
|
|
|
|
#include <Windows.h>
|
2017-05-26 04:48:44 +08:00
|
|
|
#endif
|
|
|
|
|
2018-08-11 06:18:24 +08:00
|
|
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-06-04 06:10:04 +08:00
|
|
|
using namespace std::literals;
|
|
|
|
|
2020-01-29 09:53:41 +08:00
|
|
|
// clang-format off
|
2017-05-26 04:48:44 +08:00
|
|
|
enum {
|
2020-07-08 00:06:13 +08:00
|
|
|
OPT_CONNFILE, OPT_SEEDCONNFILE, OPT_SEEDCONNSTRING, OPT_ROLE, OPT_LISTEN, OPT_PUBLICADDR, OPT_DATAFOLDER, OPT_LOGFOLDER, OPT_PARENTPID, OPT_TRACER, OPT_NEWCONSOLE,
|
2022-04-07 11:06:24 +08:00
|
|
|
OPT_NOBOX, OPT_TESTFILE, OPT_RESTARTING, OPT_RESTORING, OPT_RANDOMSEED, OPT_KEY, OPT_MEMLIMIT, OPT_VMEMLIMIT, OPT_STORAGEMEMLIMIT, OPT_CACHEMEMLIMIT, OPT_MACHINEID,
|
2020-09-11 02:06:56 +08:00
|
|
|
OPT_DCID, OPT_MACHINE_CLASS, OPT_BUGGIFY, OPT_VERSION, OPT_BUILD_FLAGS, OPT_CRASHONERROR, OPT_HELP, OPT_NETWORKIMPL, OPT_NOBUFSTDOUT, OPT_BUFSTDOUTERR,
|
2022-07-20 04:15:51 +08:00
|
|
|
OPT_TRACECLOCK, OPT_NUMTESTERS, OPT_DEVHELP, OPT_PRINT_CODE_PROBES, OPT_ROLLSIZE, OPT_MAXLOGS, OPT_MAXLOGSSIZE, OPT_KNOB, OPT_UNITTESTPARAM, OPT_TESTSERVERS, OPT_TEST_ON_SERVERS, OPT_METRICSCONNFILE,
|
2020-01-29 09:53:41 +08:00
|
|
|
OPT_METRICSPREFIX, OPT_LOGGROUP, OPT_LOCALITY, OPT_IO_TRUST_SECONDS, OPT_IO_TRUST_WARN_ONLY, OPT_FILESYSTEM, OPT_PROFILER_RSS_SIZE, OPT_KVFILE,
|
2022-07-28 05:02:01 +08:00
|
|
|
OPT_TRACE_FORMAT, OPT_WHITELIST_BINPATH, OPT_BLOB_CREDENTIAL_FILE, OPT_CONFIG_PATH, OPT_USE_TEST_CONFIG_DB, OPT_NO_CONFIG_DB, OPT_FAULT_INJECTION, OPT_PROFILER, OPT_PRINT_SIMTIME,
|
2023-03-07 08:06:03 +08:00
|
|
|
OPT_FLOW_PROCESS_NAME, OPT_FLOW_PROCESS_ENDPOINT, OPT_IP_TRUSTED_MASK, OPT_KMS_CONN_DISCOVERY_URL_FILE, OPT_KMS_CONNECTOR_TYPE, OPT_KMS_REST_ALLOW_NOT_SECURE_CONECTION, OPT_KMS_CONN_VALIDATION_TOKEN_DETAILS,
|
2023-01-10 02:55:53 +08:00
|
|
|
OPT_KMS_CONN_GET_ENCRYPTION_KEYS_ENDPOINT, OPT_KMS_CONN_GET_LATEST_ENCRYPTION_KEYS_ENDPOINT, OPT_KMS_CONN_GET_BLOB_METADATA_ENDPOINT, OPT_NEW_CLUSTER_KEY, OPT_AUTHZ_PUBLIC_KEY_FILE, OPT_USE_FUTURE_PROTOCOL_VERSION
|
2019-07-24 06:05:21 +08:00
|
|
|
};
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
CSimpleOpt::SOption g_rgOptions[] = {
|
2019-01-30 00:43:57 +08:00
|
|
|
{ OPT_CONNFILE, "-C", SO_REQ_SEP },
|
2021-12-15 00:44:39 +08:00
|
|
|
{ OPT_CONNFILE, "--cluster-file", SO_REQ_SEP },
|
|
|
|
{ OPT_SEEDCONNFILE, "--seed-cluster-file", SO_REQ_SEP },
|
|
|
|
{ OPT_SEEDCONNSTRING, "--seed-connection-string", SO_REQ_SEP },
|
2019-01-30 00:43:57 +08:00
|
|
|
{ OPT_ROLE, "-r", SO_REQ_SEP },
|
|
|
|
{ OPT_ROLE, "--role", SO_REQ_SEP },
|
|
|
|
{ OPT_PUBLICADDR, "-p", SO_REQ_SEP },
|
2021-12-15 00:44:39 +08:00
|
|
|
{ OPT_PUBLICADDR, "--public-address", SO_REQ_SEP },
|
2019-01-30 00:43:57 +08:00
|
|
|
{ OPT_LISTEN, "-l", SO_REQ_SEP },
|
2021-12-15 00:44:39 +08:00
|
|
|
{ OPT_LISTEN, "--listen-address", SO_REQ_SEP },
|
2017-05-26 04:48:44 +08:00
|
|
|
#ifdef __linux__
|
2022-08-29 22:35:04 +08:00
|
|
|
{ OPT_FILESYSTEM, "--data-filesystem", SO_REQ_SEP },
|
|
|
|
{ OPT_PROFILER_RSS_SIZE, "--rsssize", SO_REQ_SEP },
|
2017-05-26 04:48:44 +08:00
|
|
|
#endif
|
2019-01-30 00:43:57 +08:00
|
|
|
{ OPT_DATAFOLDER, "-d", SO_REQ_SEP },
|
|
|
|
{ OPT_DATAFOLDER, "--datadir", SO_REQ_SEP },
|
|
|
|
{ OPT_LOGFOLDER, "-L", SO_REQ_SEP },
|
|
|
|
{ OPT_LOGFOLDER, "--logdir", SO_REQ_SEP },
|
|
|
|
{ OPT_ROLLSIZE, "-Rs", SO_REQ_SEP },
|
|
|
|
{ OPT_ROLLSIZE, "--logsize", SO_REQ_SEP },
|
|
|
|
{ OPT_MAXLOGS, "--maxlogs", SO_REQ_SEP },
|
|
|
|
{ OPT_MAXLOGSSIZE, "--maxlogssize", SO_REQ_SEP },
|
|
|
|
{ OPT_LOGGROUP, "--loggroup", SO_REQ_SEP },
|
|
|
|
{ OPT_PARENTPID, "--parentpid", SO_REQ_SEP },
|
2020-07-08 00:06:13 +08:00
|
|
|
{ OPT_TRACER, "--tracer", SO_REQ_SEP },
|
2017-05-26 04:48:44 +08:00
|
|
|
#ifdef _WIN32
|
2019-01-30 00:43:57 +08:00
|
|
|
{ OPT_NEWCONSOLE, "-n", SO_NONE },
|
|
|
|
{ OPT_NEWCONSOLE, "--newconsole", SO_NONE },
|
|
|
|
{ OPT_NOBOX, "-q", SO_NONE },
|
2021-12-15 00:44:39 +08:00
|
|
|
{ OPT_NOBOX, "--no-dialog", SO_NONE },
|
2017-05-26 04:48:44 +08:00
|
|
|
#endif
|
2019-01-30 00:43:57 +08:00
|
|
|
{ OPT_KVFILE, "--kvfile", SO_REQ_SEP },
|
|
|
|
{ OPT_TESTFILE, "-f", SO_REQ_SEP },
|
|
|
|
{ OPT_TESTFILE, "--testfile", SO_REQ_SEP },
|
|
|
|
{ OPT_RESTARTING, "-R", SO_NONE },
|
|
|
|
{ OPT_RESTARTING, "--restarting", SO_NONE },
|
|
|
|
{ OPT_RANDOMSEED, "-s", SO_REQ_SEP },
|
|
|
|
{ OPT_RANDOMSEED, "--seed", SO_REQ_SEP },
|
|
|
|
{ OPT_KEY, "-k", SO_REQ_SEP },
|
|
|
|
{ OPT_KEY, "--key", SO_REQ_SEP },
|
|
|
|
{ OPT_MEMLIMIT, "-m", SO_REQ_SEP },
|
|
|
|
{ OPT_MEMLIMIT, "--memory", SO_REQ_SEP },
|
2022-04-07 11:06:24 +08:00
|
|
|
{ OPT_VMEMLIMIT, "--memory-vsize", SO_REQ_SEP },
|
2019-01-30 00:43:57 +08:00
|
|
|
{ OPT_STORAGEMEMLIMIT, "-M", SO_REQ_SEP },
|
2021-12-15 00:44:39 +08:00
|
|
|
{ OPT_STORAGEMEMLIMIT, "--storage-memory", SO_REQ_SEP },
|
|
|
|
{ OPT_CACHEMEMLIMIT, "--cache-memory", SO_REQ_SEP },
|
2019-01-30 00:43:57 +08:00
|
|
|
{ OPT_MACHINEID, "-i", SO_REQ_SEP },
|
2021-12-15 00:44:39 +08:00
|
|
|
{ OPT_MACHINEID, "--machine-id", SO_REQ_SEP },
|
2019-01-30 00:43:57 +08:00
|
|
|
{ OPT_DCID, "-a", SO_REQ_SEP },
|
2021-12-15 00:44:39 +08:00
|
|
|
{ OPT_DCID, "--datacenter-id", SO_REQ_SEP },
|
2019-01-30 00:43:57 +08:00
|
|
|
{ OPT_MACHINE_CLASS, "-c", SO_REQ_SEP },
|
|
|
|
{ OPT_MACHINE_CLASS, "--class", SO_REQ_SEP },
|
|
|
|
{ OPT_BUGGIFY, "-b", SO_REQ_SEP },
|
|
|
|
{ OPT_BUGGIFY, "--buggify", SO_REQ_SEP },
|
|
|
|
{ OPT_VERSION, "-v", SO_NONE },
|
|
|
|
{ OPT_VERSION, "--version", SO_NONE },
|
2021-12-15 00:44:39 +08:00
|
|
|
{ OPT_BUILD_FLAGS, "--build-flags", SO_NONE },
|
2019-01-30 00:43:57 +08:00
|
|
|
{ OPT_CRASHONERROR, "--crash", SO_NONE },
|
|
|
|
{ OPT_NETWORKIMPL, "-N", SO_REQ_SEP },
|
|
|
|
{ OPT_NETWORKIMPL, "--network", SO_REQ_SEP },
|
|
|
|
{ OPT_NOBUFSTDOUT, "--unbufferedout", SO_NONE },
|
|
|
|
{ OPT_BUFSTDOUTERR, "--bufferedout", SO_NONE },
|
|
|
|
{ OPT_TRACECLOCK, "--traceclock", SO_REQ_SEP },
|
2021-12-15 00:44:39 +08:00
|
|
|
{ OPT_NUMTESTERS, "--num-testers", SO_REQ_SEP },
|
2019-01-30 00:43:57 +08:00
|
|
|
{ OPT_HELP, "-?", SO_NONE },
|
|
|
|
{ OPT_HELP, "-h", SO_NONE },
|
|
|
|
{ OPT_HELP, "--help", SO_NONE },
|
|
|
|
{ OPT_DEVHELP, "--dev-help", SO_NONE },
|
2022-07-20 04:15:51 +08:00
|
|
|
{ OPT_PRINT_CODE_PROBES, "--code-probes", SO_REQ_SEP },
|
2021-12-15 00:44:39 +08:00
|
|
|
{ OPT_KNOB, "--knob-", SO_REQ_SEP },
|
|
|
|
{ OPT_UNITTESTPARAM, "--test-", SO_REQ_SEP },
|
|
|
|
{ OPT_LOCALITY, "--locality-", SO_REQ_SEP },
|
2019-01-30 00:43:57 +08:00
|
|
|
{ OPT_TESTSERVERS, "--testservers", SO_REQ_SEP },
|
|
|
|
{ OPT_TEST_ON_SERVERS, "--testonservers", SO_NONE },
|
2021-12-15 00:44:39 +08:00
|
|
|
{ OPT_METRICSCONNFILE, "--metrics-cluster", SO_REQ_SEP },
|
|
|
|
{ OPT_METRICSPREFIX, "--metrics-prefix", SO_REQ_SEP },
|
|
|
|
{ OPT_IO_TRUST_SECONDS, "--io-trust-seconds", SO_REQ_SEP },
|
|
|
|
{ OPT_IO_TRUST_WARN_ONLY, "--io-trust-warn-only", SO_NONE },
|
|
|
|
{ OPT_TRACE_FORMAT, "--trace-format", SO_REQ_SEP },
|
|
|
|
{ OPT_WHITELIST_BINPATH, "--whitelist-binpath", SO_REQ_SEP },
|
|
|
|
{ OPT_BLOB_CREDENTIAL_FILE, "--blob-credential-file", SO_REQ_SEP },
|
|
|
|
{ OPT_CONFIG_PATH, "--config-path", SO_REQ_SEP },
|
|
|
|
{ OPT_USE_TEST_CONFIG_DB, "--use-test-config-db", SO_NONE },
|
2022-07-28 05:02:01 +08:00
|
|
|
{ OPT_NO_CONFIG_DB, "--no-config-db", SO_NONE },
|
2021-07-24 07:28:20 +08:00
|
|
|
{ OPT_FAULT_INJECTION, "-fi", SO_REQ_SEP },
|
2021-12-15 00:44:39 +08:00
|
|
|
{ OPT_FAULT_INJECTION, "--fault-injection", SO_REQ_SEP },
|
Remote ikvs debugging (#6465)
* initial structure for remote IKVS server
* moved struct to .h file, added new files to CMakeList
* happy path implementation, connection error when testing
* saved minor local change
* changed tracing to debug
* fixed onClosed and getError being called before init is finished
* fix spawn process bug, now use absolute path
* added server knob to set ikvs process port number
* added server knob for remote/local kv store
* implement simulator remote process spawning
* fixed bug for simulator timeout
* commit all changes
* removed print lines in trace
* added FlowProcess implementation by Markus
* initial debug of FlowProcess, stuck at parent sending OpenKVStoreRequest to child
* temporary fix for process factory throwing segfault on create
* specify public address in command
* change remote kv store knob to false for jenkins build
* made port 0 open random unused port
* change remote store knob to true for benchmark
* set listening port to randomly opened port
* added print lines for jenkins run open kv store timeout debug
* removed most tracing and print lines
* removed tutorial changes
* update handleIOErrors error handling to handle remote-ikvs cases
* Push all debugging changes
* A version where worker bug exists
* A version where restarting tests fail
* Use both the name and the port to determine the child process
* Remove unnecessary update on local address
* Disable remote-kvs for DiskFailureCycle test
* A version where restarting stuck
* A version where most restarting tests green
* Reset connection with child process explicitly
* Remove change on unnecessary files
* Unify flags from _ to -
* fix merging unexpected changes
* fix trac.error to .errorUnsuppressed
* Add license header
* Remove unnecessary header in FlowProcess.actor.cpp
* Fix Windows build
* Fix Windows build, add missing ;
* Fix a stupid bug caused by code dropped by code merging
* Disable remote kvs by default
* Pass the conn_file path to the flow process, though not needed, but the buildNetwork is difficult to tune
* serialization change on readrange
* Update traces
* Refactor the RemoteIKVS interface
* Format files
* Update sim2 interface to not clog connections between parent and child processes in simulation
* Update comments; remove debugging symbols; Add error handling for remote_kvs_cancelled
* Add comments, format files
* Change method name from isBuggifyDisabled to isStableConnection; Decrease(0.1x) latency for stable connections
* Commit the IConnection interface change, forgot in previous commit
* Fix the issue that onClosed request is cancelled by ActorCollection
* Enable the remote kv store knob
* Remove FlowProcess.actor.cpp and move functions to RemoteIKeyValueStore.actor.cpp; Add remote kv store delay to avoid race; Bind the child process to die with parent process
* Fix the bug where one process starts storage server more than once
* Add a please_reboot_remote_kv_store error to restart the storage server worker if remote kvs died abnormally
* Remove unreachable code path and add comments
* Clang format the code
* Fix a simple wait error
* Clang format after merging the main branch
* Testing mixed mode in simulation if remote_kvs knob is enabled, setting the default to false
* Disable remote kvs for PhysicalShardMove which is for RocksDB
* Cleanup #include orders, remove debugging traces
* Revert the reorder in fdbserver.actor.cpp, which fails the gcc build
Co-authored-by: “Lincoln <“lincoln.xiao@snowflake.com”>
2022-04-01 08:08:59 +08:00
|
|
|
{ OPT_PROFILER, "--profiler-", SO_REQ_SEP },
|
2021-12-15 00:44:39 +08:00
|
|
|
{ OPT_PRINT_SIMTIME, "--print-sim-time", SO_NONE },
|
Remote ikvs debugging (#6465)
* initial structure for remote IKVS server
* moved struct to .h file, added new files to CMakeList
* happy path implementation, connection error when testing
* saved minor local change
* changed tracing to debug
* fixed onClosed and getError being called before init is finished
* fix spawn process bug, now use absolute path
* added server knob to set ikvs process port number
* added server knob for remote/local kv store
* implement simulator remote process spawning
* fixed bug for simulator timeout
* commit all changes
* removed print lines in trace
* added FlowProcess implementation by Markus
* initial debug of FlowProcess, stuck at parent sending OpenKVStoreRequest to child
* temporary fix for process factory throwing segfault on create
* specify public address in command
* change remote kv store knob to false for jenkins build
* made port 0 open random unused port
* change remote store knob to true for benchmark
* set listening port to randomly opened port
* added print lines for jenkins run open kv store timeout debug
* removed most tracing and print lines
* removed tutorial changes
* update handleIOErrors error handling to handle remote-ikvs cases
* Push all debugging changes
* A version where worker bug exists
* A version where restarting tests fail
* Use both the name and the port to determine the child process
* Remove unnecessary update on local address
* Disable remote-kvs for DiskFailureCycle test
* A version where restarting stuck
* A version where most restarting tests green
* Reset connection with child process explicitly
* Remove change on unnecessary files
* Unify flags from _ to -
* fix merging unexpected changes
* fix trac.error to .errorUnsuppressed
* Add license header
* Remove unnecessary header in FlowProcess.actor.cpp
* Fix Windows build
* Fix Windows build, add missing ;
* Fix a stupid bug caused by code dropped by code merging
* Disable remote kvs by default
* Pass the conn_file path to the flow process, though not needed, but the buildNetwork is difficult to tune
* serialization change on readrange
* Update traces
* Refactor the RemoteIKVS interface
* Format files
* Update sim2 interface to not clog connections between parent and child processes in simulation
* Update comments; remove debugging symbols; Add error handling for remote_kvs_cancelled
* Add comments, format files
* Change method name from isBuggifyDisabled to isStableConnection; Decrease(0.1x) latency for stable connections
* Commit the IConnection interface change, forgot in previous commit
* Fix the issue that onClosed request is cancelled by ActorCollection
* Enable the remote kv store knob
* Remove FlowProcess.actor.cpp and move functions to RemoteIKeyValueStore.actor.cpp; Add remote kv store delay to avoid race; Bind the child process to die with parent process
* Fix the bug where one process starts storage server more than once
* Add a please_reboot_remote_kv_store error to restart the storage server worker if remote kvs died abnormally
* Remove unreachable code path and add comments
* Clang format the code
* Fix a simple wait error
* Clang format after merging the main branch
* Testing mixed mode in simulation if remote_kvs knob is enabled, setting the default to false
* Disable remote kvs for PhysicalShardMove which is for RocksDB
* Cleanup #include orders, remove debugging traces
* Revert the reorder in fdbserver.actor.cpp, which fails the gcc build
Co-authored-by: “Lincoln <“lincoln.xiao@snowflake.com”>
2022-04-01 08:08:59 +08:00
|
|
|
{ OPT_FLOW_PROCESS_NAME, "--process-name", SO_REQ_SEP },
|
|
|
|
{ OPT_FLOW_PROCESS_ENDPOINT, "--process-endpoint", SO_REQ_SEP },
|
2022-02-17 01:12:03 +08:00
|
|
|
{ OPT_IP_TRUSTED_MASK, "--trusted-subnet-", SO_REQ_SEP },
|
2022-07-26 13:12:28 +08:00
|
|
|
{ OPT_NEW_CLUSTER_KEY, "--new-cluster-key", SO_REQ_SEP },
|
2022-07-29 01:13:58 +08:00
|
|
|
{ OPT_AUTHZ_PUBLIC_KEY_FILE, "--authorization-public-key-file", SO_REQ_SEP },
|
2022-08-26 01:00:46 +08:00
|
|
|
{ OPT_KMS_CONN_DISCOVERY_URL_FILE, "--discover-kms-conn-url-file", SO_REQ_SEP },
|
2023-03-07 08:06:03 +08:00
|
|
|
{ OPT_KMS_CONNECTOR_TYPE, "--kms-connector-type", SO_REQ_SEP },
|
|
|
|
{ OPT_KMS_REST_ALLOW_NOT_SECURE_CONECTION, "--kms-rest-allow-not-secure-connection", SO_NONE },
|
2022-08-26 01:00:46 +08:00
|
|
|
{ OPT_KMS_CONN_VALIDATION_TOKEN_DETAILS, "--kms-conn-validation-token-details", SO_REQ_SEP },
|
|
|
|
{ OPT_KMS_CONN_GET_ENCRYPTION_KEYS_ENDPOINT, "--kms-conn-get-encryption-keys-endpoint", SO_REQ_SEP },
|
2023-01-10 02:55:53 +08:00
|
|
|
{ OPT_KMS_CONN_GET_LATEST_ENCRYPTION_KEYS_ENDPOINT, "--kms-conn-get-latest-encryption-keys-endpoint", SO_REQ_SEP },
|
2022-10-15 06:49:00 +08:00
|
|
|
{ OPT_KMS_CONN_GET_BLOB_METADATA_ENDPOINT, "--kms-conn-get-blob-metadata-endpoint", SO_REQ_SEP },
|
2022-08-08 23:29:49 +08:00
|
|
|
{ OPT_USE_FUTURE_PROTOCOL_VERSION, "--use-future-protocol-version", SO_REQ_SEP },
|
2022-05-03 13:56:06 +08:00
|
|
|
TLS_OPTION_FLAGS,
|
2017-05-26 04:48:44 +08:00
|
|
|
SO_END_OF_OPTIONS
|
|
|
|
};
|
|
|
|
|
2020-01-29 09:53:41 +08:00
|
|
|
// clang-format on
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
extern void dsltest();
|
|
|
|
extern void pingtest();
|
|
|
|
extern void copyTest();
|
|
|
|
extern void versionedMapTest();
|
|
|
|
extern void createTemplateDatabase();
|
|
|
|
|
2019-11-16 04:26:51 +08:00
|
|
|
extern const char* getSourceVersion();
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
extern void flushTraceFileVoid();
|
|
|
|
|
|
|
|
extern const int MAX_CLUSTER_FILE_BYTES;
|
|
|
|
|
|
|
|
#ifdef ALLOC_INSTRUMENTATION
|
|
|
|
extern uint8_t* g_extra_memory;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
bool enableFailures = true;
|
|
|
|
|
|
|
|
#define test_assert(x) \
|
|
|
|
if (!(x)) { \
|
2021-09-17 08:42:34 +08:00
|
|
|
std::cout << "Test failed: " #x << std::endl; \
|
2017-05-26 04:48:44 +08:00
|
|
|
return false; \
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
#include <sddl.h>
|
|
|
|
|
|
|
|
// It is your
|
|
|
|
// responsibility to properly initialize the
|
|
|
|
// structure and to free the structure's
|
|
|
|
// lpSecurityDescriptor member when you have
|
|
|
|
// finished using it. To free the structure's
|
|
|
|
// lpSecurityDescriptor member, call the
|
|
|
|
// LocalFree function.
|
|
|
|
BOOL CreatePermissiveReadWriteDACL(SECURITY_ATTRIBUTES* pSA) {
|
2020-08-19 05:18:50 +08:00
|
|
|
UNSTOPPABLE_ASSERT(pSA != nullptr);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
TCHAR* szSD = TEXT("D:") // Discretionary ACL
|
|
|
|
TEXT("(A;OICI;GR;;;AU)") // Allow read/write/execute to authenticated users
|
|
|
|
TEXT("(A;OICI;GA;;;BA)"); // Allow full control to administrators
|
|
|
|
|
|
|
|
return ConvertStringSecurityDescriptorToSecurityDescriptor(
|
|
|
|
szSD, SDDL_REVISION_1, &(pSA->lpSecurityDescriptor), nullptr);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
class WorldReadablePermissions {
|
|
|
|
public:
|
|
|
|
WorldReadablePermissions() {
|
|
|
|
#ifdef _WIN32
|
|
|
|
sa.nLength = sizeof(SECURITY_ATTRIBUTES);
|
|
|
|
sa.bInheritHandle = FALSE;
|
|
|
|
if (!CreatePermissiveReadWriteDACL(&sa)) {
|
|
|
|
TraceEvent("Win32DACLCreationFail").GetLastError();
|
|
|
|
throw platform_error();
|
|
|
|
}
|
|
|
|
permission.set_permissions(&sa);
|
2020-02-02 02:00:06 +08:00
|
|
|
#elif (defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__))
|
2017-05-26 04:48:44 +08:00
|
|
|
// There is nothing to do here, since the default permissions are fine
|
|
|
|
#else
|
|
|
|
#error Port me!
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual ~WorldReadablePermissions() {
|
|
|
|
#ifdef _WIN32
|
|
|
|
LocalFree(sa.lpSecurityDescriptor);
|
2020-02-02 02:00:06 +08:00
|
|
|
#elif (defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__))
|
2017-05-26 04:48:44 +08:00
|
|
|
// There is nothing to do here, since the default permissions are fine
|
|
|
|
#else
|
|
|
|
#error Port me!
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
boost::interprocess::permissions permission;
|
|
|
|
|
|
|
|
private:
|
|
|
|
WorldReadablePermissions(const WorldReadablePermissions& rhs) {}
|
|
|
|
#ifdef _WIN32
|
|
|
|
SECURITY_ATTRIBUTES sa;
|
|
|
|
#endif
|
|
|
|
};
|
|
|
|
|
|
|
|
UID getSharedMemoryMachineId() {
|
2022-03-29 11:00:03 +08:00
|
|
|
// new UID to use if an existing one is not found
|
|
|
|
UID newUID = deterministicRandom()->randomUniqueID();
|
|
|
|
|
|
|
|
#if DEBUG_DETERMINISM
|
|
|
|
// Don't use shared memory if DEBUG_DETERMINISM is set
|
|
|
|
return newUID;
|
|
|
|
#else
|
2020-08-19 05:18:50 +08:00
|
|
|
UID* machineId = nullptr;
|
2017-05-26 04:48:44 +08:00
|
|
|
int numTries = 0;
|
|
|
|
|
|
|
|
// Permissions object defaults to 0644 on *nix, but on windows defaults to allowing access to only the creator.
|
|
|
|
// On windows, this means that we have to create an elaborate workaround for DACLs
|
|
|
|
WorldReadablePermissions p;
|
2019-03-22 06:23:22 +08:00
|
|
|
std::string sharedMemoryIdentifier = "fdbserver_shared_memory_id";
|
2017-05-26 04:48:44 +08:00
|
|
|
loop {
|
|
|
|
try {
|
2022-02-17 01:12:03 +08:00
|
|
|
// "0" is the default netPrefix "addr"
|
2019-03-22 05:05:41 +08:00
|
|
|
boost::interprocess::managed_shared_memory segment(
|
|
|
|
boost::interprocess::open_or_create, sharedMemoryIdentifier.c_str(), 1000, 0, p.permission);
|
2022-03-29 11:00:03 +08:00
|
|
|
machineId = segment.find_or_construct<UID>("machineId")(newUID);
|
2017-05-26 04:48:44 +08:00
|
|
|
if (!machineId)
|
|
|
|
criticalError(
|
|
|
|
FDB_EXIT_ERROR, "SharedMemoryError", "Could not locate or create shared memory - 'machineId'");
|
|
|
|
return *machineId;
|
2019-03-27 00:58:54 +08:00
|
|
|
} catch (boost::interprocess::interprocess_exception&) {
|
2017-05-26 04:48:44 +08:00
|
|
|
try {
|
|
|
|
// If the shared memory already exists, open it read-only in case it was created by another user
|
2019-03-22 05:05:41 +08:00
|
|
|
boost::interprocess::managed_shared_memory segment(boost::interprocess::open_read_only,
|
|
|
|
sharedMemoryIdentifier.c_str());
|
2017-05-26 04:48:44 +08:00
|
|
|
machineId = segment.find<UID>("machineId").first;
|
|
|
|
if (!machineId)
|
|
|
|
criticalError(FDB_EXIT_ERROR, "SharedMemoryError", "Could not locate shared memory - 'machineId'");
|
|
|
|
return *machineId;
|
|
|
|
} catch (boost::interprocess::interprocess_exception& ex) {
|
|
|
|
// Retry in case the shared memory was deleted in between the call to open_or_create and open_read_only
|
|
|
|
// Don't keep trying forever in case this is caused by some other problem
|
|
|
|
if (++numTries == 10)
|
|
|
|
criticalError(FDB_EXIT_ERROR,
|
|
|
|
"SharedMemoryError",
|
|
|
|
format("Could not open shared memory - %s", ex.what()).c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-03-29 11:00:03 +08:00
|
|
|
#endif
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2022-09-15 08:10:49 +08:00
|
|
|
ACTOR void failAfter(Future<Void> trigger, ISimulator::ProcessInfo* m = g_simulator->getCurrentProcess()) {
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(trigger);
|
2017-05-26 04:48:44 +08:00
|
|
|
if (enableFailures) {
|
|
|
|
printf("Killing machine: %s at %f\n", m->address.toString().c_str(), now());
|
2023-01-01 07:22:28 +08:00
|
|
|
g_simulator->killProcess(m, ISimulator::KillType::KillInstantly);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void failAfter(Future<Void> trigger, Endpoint e) {
|
2022-09-15 08:10:49 +08:00
|
|
|
if (g_network == g_simulator)
|
|
|
|
failAfter(trigger, g_simulator->getProcess(e));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2020-11-13 09:03:41 +08:00
|
|
|
ACTOR Future<Void> histogramReport() {
|
|
|
|
loop {
|
|
|
|
wait(delay(SERVER_KNOBS->HISTOGRAM_REPORT_INTERVAL));
|
|
|
|
|
|
|
|
GetHistogramRegistry().logReport();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
void testSerializationSpeed() {
|
|
|
|
double tstart;
|
|
|
|
double build = 0, serialize = 0, deserialize = 0, copy = 0, deallocate = 0;
|
|
|
|
double bytes = 0;
|
|
|
|
double testBegin = timer();
|
|
|
|
for (int a = 0; a < 10000; a++) {
|
|
|
|
{
|
|
|
|
tstart = timer();
|
|
|
|
|
|
|
|
Arena batchArena;
|
|
|
|
VectorRef<CommitTransactionRef> batch;
|
|
|
|
batch.resize(batchArena, 1000);
|
|
|
|
for (int t = 0; t < batch.size(); t++) {
|
|
|
|
CommitTransactionRef& tr = batch[t];
|
|
|
|
tr.read_snapshot = 0;
|
|
|
|
for (int i = 0; i < 2; i++)
|
2022-09-20 02:35:58 +08:00
|
|
|
tr.mutations.push_back_deep(batchArena,
|
|
|
|
MutationRef(MutationRef::SetValue, "KeyABCDE"_sr, "SomeValu"_sr));
|
|
|
|
tr.mutations.push_back_deep(batchArena,
|
|
|
|
MutationRef(MutationRef::ClearRange, "BeginKey"_sr, "EndKeyAB"_sr));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
build += timer() - tstart;
|
|
|
|
|
|
|
|
tstart = timer();
|
|
|
|
|
|
|
|
BinaryWriter wr(IncludeVersion());
|
|
|
|
wr << batch;
|
|
|
|
|
|
|
|
bytes += wr.getLength();
|
|
|
|
|
|
|
|
serialize += timer() - tstart;
|
|
|
|
|
|
|
|
for (int i = 0; i < 1; i++) {
|
|
|
|
tstart = timer();
|
|
|
|
Arena arena;
|
|
|
|
StringRef data(arena, StringRef((const uint8_t*)wr.getData(), wr.getLength()));
|
|
|
|
copy += timer() - tstart;
|
|
|
|
|
|
|
|
tstart = timer();
|
|
|
|
ArenaReader rd(arena, data, IncludeVersion());
|
|
|
|
VectorRef<CommitTransactionRef> batch2;
|
|
|
|
rd >> arena >> batch2;
|
|
|
|
|
|
|
|
deserialize += timer() - tstart;
|
|
|
|
}
|
|
|
|
|
|
|
|
tstart = timer();
|
|
|
|
}
|
|
|
|
deallocate += timer() - tstart;
|
|
|
|
}
|
|
|
|
double elapsed = (timer() - testBegin);
|
|
|
|
printf("Test speed: %0.1f MB/sec (%0.0f/sec)\n", bytes / 1e6 / elapsed, 1000000 / elapsed);
|
|
|
|
printf(" Build: %0.1f MB/sec\n", bytes / 1e6 / build);
|
|
|
|
printf(" Serialize: %0.1f MB/sec\n", bytes / 1e6 / serialize);
|
|
|
|
printf(" Copy: %0.1f MB/sec\n", bytes / 1e6 / copy);
|
|
|
|
printf(" Deserialize: %0.1f MB/sec\n", bytes / 1e6 / deserialize);
|
|
|
|
printf(" Deallocate: %0.1f MB/sec\n", bytes / 1e6 / deallocate);
|
|
|
|
printf(" Bytes: %0.1f MB\n", bytes / 1e6);
|
|
|
|
printf("\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string toHTML(const StringRef& binaryString) {
|
|
|
|
std::string s;
|
|
|
|
|
|
|
|
for (int i = 0; i < binaryString.size(); i++) {
|
|
|
|
uint8_t c = binaryString[i];
|
|
|
|
if (c == '<')
|
|
|
|
s += "<";
|
|
|
|
else if (c == '>')
|
|
|
|
s += ">";
|
|
|
|
else if (c == '&')
|
|
|
|
s += "&";
|
|
|
|
else if (c == '"')
|
|
|
|
s += """;
|
|
|
|
else if (c == ' ')
|
|
|
|
s += " ";
|
|
|
|
else if (c > 32 && c < 127)
|
|
|
|
s += c;
|
|
|
|
else
|
|
|
|
s += format("<span class=\"binary\">[%02x]</span>", c);
|
|
|
|
}
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
ACTOR Future<Void> dumpDatabase(Database cx, std::string outputFilename, KeyRange range = allKeys) {
|
|
|
|
try {
|
|
|
|
state Transaction tr(cx);
|
|
|
|
loop {
|
|
|
|
state FILE* output = fopen(outputFilename.c_str(), "wt");
|
|
|
|
try {
|
|
|
|
state KeySelectorRef iter = firstGreaterOrEqual(range.begin);
|
|
|
|
state Arena arena;
|
|
|
|
fprintf(output, "<html><head><style type=\"text/css\">.binary {color:red}</style></head><body>\n");
|
|
|
|
Version ver = wait(tr.getReadVersion());
|
2019-05-05 01:52:02 +08:00
|
|
|
fprintf(output, "<h3>Database version: %" PRId64 "</h3>", ver);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
loop {
|
2021-05-04 04:14:16 +08:00
|
|
|
RangeResult results = wait(tr.getRange(iter, firstGreaterOrEqual(range.end), 1000));
|
2017-05-26 04:48:44 +08:00
|
|
|
for (int r = 0; r < results.size(); r++) {
|
|
|
|
std::string key = toHTML(results[r].key), value = toHTML(results[r].value);
|
|
|
|
fprintf(output, "<p>%s <b>:=</b> %s</p>\n", key.c_str(), value.c_str());
|
|
|
|
}
|
|
|
|
if (results.size() < 1000)
|
|
|
|
break;
|
|
|
|
iter = firstGreaterThan(KeyRef(arena, results[results.size() - 1].key));
|
|
|
|
}
|
|
|
|
fprintf(output, "</body></html>");
|
|
|
|
fclose(output);
|
|
|
|
TraceEvent("DatabaseDumped").detail("Filename", outputFilename);
|
|
|
|
return Void();
|
|
|
|
} catch (Error& e) {
|
|
|
|
fclose(output);
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(tr.onError(e));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch (Error& e) {
|
2018-06-09 04:57:00 +08:00
|
|
|
TraceEvent(SevError, "DumpDatabaseError").error(e).detail("Filename", outputFilename);
|
2017-05-26 04:48:44 +08:00
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void memoryTest();
|
|
|
|
void skipListTest();
|
|
|
|
|
2020-10-31 02:20:40 +08:00
|
|
|
Future<Void> startSystemMonitor(std::string dataFolder,
|
|
|
|
Optional<Standalone<StringRef>> dcId,
|
|
|
|
Optional<Standalone<StringRef>> zoneId,
|
2023-01-11 08:37:54 +08:00
|
|
|
Optional<Standalone<StringRef>> machineId,
|
|
|
|
Optional<Standalone<StringRef>> datahallId) {
|
2022-12-13 05:16:30 +08:00
|
|
|
initializeSystemMonitorMachineState(SystemMonitorMachineState(
|
2023-01-11 08:37:54 +08:00
|
|
|
dataFolder, dcId, zoneId, machineId, datahallId, g_network->getLocalAddress().ip, FDB_VT_VERSION));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
systemMonitor();
|
2021-03-24 02:15:37 +08:00
|
|
|
return recurring(&systemMonitor, SERVER_KNOBS->SYSTEM_MONITOR_FREQUENCY, TaskPriority::FlushTrace);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void testIndexedSet();
|
|
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
void parentWatcher(void* parentHandle) {
|
|
|
|
HANDLE parent = (HANDLE)parentHandle;
|
|
|
|
int signal = WaitForSingleObject(parent, INFINITE);
|
|
|
|
CloseHandle(parentHandle);
|
|
|
|
if (signal == WAIT_OBJECT_0)
|
|
|
|
criticalError(FDB_EXIT_SUCCESS, "ParentProcessExited", "Parent process exited");
|
|
|
|
TraceEvent(SevError, "ParentProcessWaitFailed").detail("RetCode", signal).GetLastError();
|
|
|
|
}
|
2019-04-26 03:49:14 +08:00
|
|
|
#else
|
|
|
|
void* parentWatcher(void* arg) {
|
|
|
|
int* parent_pid = (int*)arg;
|
|
|
|
while (1) {
|
|
|
|
sleep(1);
|
|
|
|
if (getppid() != *parent_pid)
|
|
|
|
criticalError(FDB_EXIT_SUCCESS, "ParentProcessExited", "Parent process exited");
|
|
|
|
}
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
#endif
|
|
|
|
|
2020-09-11 02:06:56 +08:00
|
|
|
static void printBuildInformation() {
|
|
|
|
printf("%s", jsonBuildInformation().c_str());
|
|
|
|
}
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
static void printVersion() {
|
|
|
|
printf("FoundationDB " FDB_VT_PACKAGE_NAME " (v" FDB_VT_VERSION ")\n");
|
2019-11-16 04:26:51 +08:00
|
|
|
printf("source version %s\n", getSourceVersion());
|
2022-08-08 23:29:49 +08:00
|
|
|
printf("protocol %" PRIx64 "\n", currentProtocolVersion().version());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void printHelpTeaser(const char* name) {
|
|
|
|
fprintf(stderr, "Try `%s --help' for more information.\n", name);
|
|
|
|
}
|
|
|
|
|
2020-09-22 11:19:15 +08:00
|
|
|
static void printOptionUsage(std::string option, std::string description) {
|
|
|
|
static const std::string OPTION_INDENT(" ");
|
|
|
|
static const std::string DESCRIPTION_INDENT(" ");
|
|
|
|
static const int WIDTH = 80;
|
|
|
|
|
|
|
|
boost::algorithm::trim(option);
|
|
|
|
boost::algorithm::trim(description);
|
|
|
|
|
|
|
|
std::string result = OPTION_INDENT + option + "\n";
|
|
|
|
|
|
|
|
std::stringstream sstream(description);
|
|
|
|
if (sstream.eof()) {
|
2021-10-31 12:07:38 +08:00
|
|
|
printf("%s", result.c_str());
|
2020-09-22 11:19:15 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string currWord;
|
|
|
|
sstream >> currWord;
|
|
|
|
|
|
|
|
std::string currLine(DESCRIPTION_INDENT + ' ' + currWord);
|
2020-09-24 02:19:02 +08:00
|
|
|
int currLength = currLine.size();
|
2020-09-22 11:19:15 +08:00
|
|
|
|
|
|
|
while (!sstream.eof()) {
|
|
|
|
sstream >> currWord;
|
|
|
|
|
|
|
|
if (currLength + static_cast<int>(currWord.size()) + 1 > WIDTH) {
|
|
|
|
result += currLine + '\n';
|
|
|
|
currLine = DESCRIPTION_INDENT + ' ' + currWord;
|
|
|
|
} else {
|
|
|
|
currLine += ' ' + currWord;
|
|
|
|
}
|
|
|
|
currLength = currLine.size();
|
|
|
|
}
|
|
|
|
result += currLine + '\n';
|
|
|
|
|
2021-10-31 12:07:38 +08:00
|
|
|
printf("%s", result.c_str());
|
2020-09-22 11:19:15 +08:00
|
|
|
}
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
static void printUsage(const char* name, bool devhelp) {
|
|
|
|
printf("FoundationDB " FDB_VT_PACKAGE_NAME " (v" FDB_VT_VERSION ")\n");
|
|
|
|
printf("Usage: %s -p ADDRESS [OPTIONS]\n\n", name);
|
2021-12-15 00:44:39 +08:00
|
|
|
printOptionUsage("-p ADDRESS, --public-address ADDRESS",
|
2020-09-24 03:03:09 +08:00
|
|
|
" Public address, specified as `IP_ADDRESS:PORT' or `auto:PORT'.");
|
2021-12-15 00:44:39 +08:00
|
|
|
printOptionUsage("-l ADDRESS, --listen-address ADDRESS",
|
2020-09-24 03:03:09 +08:00
|
|
|
" Listen address, specified as `IP_ADDRESS:PORT' (defaults to"
|
|
|
|
" public address).");
|
2021-12-15 00:44:39 +08:00
|
|
|
printOptionUsage("-C CONNFILE, --cluster-file CONNFILE",
|
2020-09-24 03:03:09 +08:00
|
|
|
" The path of a file containing the connection string for the"
|
|
|
|
" FoundationDB cluster. The default is first the value of the"
|
|
|
|
" FDB_CLUSTER_FILE environment variable, then `./fdb.cluster',"
|
|
|
|
" then `" +
|
|
|
|
platform::getDefaultClusterFilePath() + "'.");
|
2021-12-15 00:44:39 +08:00
|
|
|
printOptionUsage("--seed-cluster-file SEEDCONNFILE",
|
2020-09-24 03:03:09 +08:00
|
|
|
" The path of a seed cluster file which will be used to connect"
|
|
|
|
" if the -C cluster file does not exist. If the server connects"
|
|
|
|
" successfully using the seed file, then it copies the file to"
|
|
|
|
" the -C file location.");
|
2021-12-15 00:44:39 +08:00
|
|
|
printOptionUsage("--seed-connection-string SEEDCONNSTRING",
|
2020-09-24 03:03:09 +08:00
|
|
|
" The path of a seed connection string which will be used to connect"
|
|
|
|
" if the -C cluster file does not exist. If the server connects"
|
|
|
|
" successfully using the seed string, then it copies the string to"
|
|
|
|
" the -C file location.");
|
2017-05-26 04:48:44 +08:00
|
|
|
#ifdef __linux__
|
2021-12-15 00:44:39 +08:00
|
|
|
printOptionUsage("--data-filesystem PATH",
|
2020-09-24 03:03:09 +08:00
|
|
|
" Turns on validation that all data files are written to a drive"
|
|
|
|
" mounted at the specified PATH. This checks that the device at PATH"
|
|
|
|
" is currently mounted and that any data files get written to the"
|
|
|
|
" same device.");
|
2017-05-26 04:48:44 +08:00
|
|
|
#endif
|
2020-09-24 03:03:09 +08:00
|
|
|
printOptionUsage("-d PATH, --datadir PATH",
|
|
|
|
" Store data files in the given folder (must be unique for each"
|
|
|
|
" fdbserver instance on a given machine).");
|
|
|
|
printOptionUsage("-L PATH, --logdir PATH", " Store log files in the given folder (default is `.').");
|
|
|
|
printOptionUsage("--logsize SIZE",
|
|
|
|
"Roll over to a new log file after the current log file"
|
|
|
|
" exceeds SIZE bytes. The default value is 10MiB.");
|
|
|
|
printOptionUsage("--maxlogs SIZE, --maxlogssize SIZE",
|
|
|
|
" Delete the oldest log file when the total size of all log"
|
|
|
|
" files exceeds SIZE bytes. If set to 0, old log files will not"
|
|
|
|
" be deleted. The default value is 100MiB.");
|
|
|
|
printOptionUsage("--loggroup LOG_GROUP",
|
|
|
|
" Sets the LogGroup field with the specified value for all"
|
|
|
|
" events in the trace output (defaults to `default').");
|
2021-12-15 00:44:39 +08:00
|
|
|
printOptionUsage("--trace-format FORMAT",
|
2020-09-24 03:03:09 +08:00
|
|
|
" Select the format of the log files. xml (the default) and json"
|
|
|
|
" are supported.");
|
|
|
|
printOptionUsage("--tracer TRACER",
|
|
|
|
" Select a tracer for transaction tracing. Currently disabled"
|
|
|
|
" (the default) and log_file are supported.");
|
2021-12-15 00:44:39 +08:00
|
|
|
printOptionUsage("-i ID, --machine-id ID",
|
2020-09-24 03:03:09 +08:00
|
|
|
" Machine and zone identifier key (up to 16 hex characters)."
|
|
|
|
" Defaults to a random value shared by all fdbserver processes"
|
|
|
|
" on this machine.");
|
2021-12-15 00:44:39 +08:00
|
|
|
printOptionUsage("-a ID, --datacenter-id ID", " Data center identifier key (up to 16 hex characters).");
|
|
|
|
printOptionUsage("--locality-LOCALITYKEY LOCALITYVALUE",
|
2020-09-24 03:03:09 +08:00
|
|
|
" Define a locality key. LOCALITYKEY is case-insensitive though"
|
|
|
|
" LOCALITYVALUE is not.");
|
|
|
|
printOptionUsage("-m SIZE, --memory SIZE",
|
2022-04-07 11:06:24 +08:00
|
|
|
" Resident memory limit. The default value is 8GiB. When specified"
|
|
|
|
" without a unit, MiB is assumed.");
|
|
|
|
printOptionUsage("--memory-vsize SIZE",
|
|
|
|
" Virtual memory limit. The default value is unlimited. When specified"
|
2020-09-24 03:03:09 +08:00
|
|
|
" without a unit, MiB is assumed.");
|
2021-12-15 00:44:39 +08:00
|
|
|
printOptionUsage("-M SIZE, --storage-memory SIZE",
|
2020-09-24 03:03:09 +08:00
|
|
|
" Maximum amount of memory used for storage. The default"
|
|
|
|
" value is 1GiB. When specified without a unit, MB is"
|
|
|
|
" assumed.");
|
2021-12-15 00:44:39 +08:00
|
|
|
printOptionUsage("--cache-memory SIZE",
|
2020-09-24 03:03:09 +08:00
|
|
|
" The amount of memory to use for caching disk pages."
|
|
|
|
" The default value is 2GiB. When specified without a unit,"
|
|
|
|
" MiB is assumed.");
|
|
|
|
printOptionUsage("-c CLASS, --class CLASS",
|
|
|
|
" Machine class (valid options are storage, transaction,"
|
|
|
|
" resolution, grv_proxy, commit_proxy, master, test, unset, stateless, log, router,"
|
|
|
|
" and cluster_controller).");
|
2021-12-15 00:44:39 +08:00
|
|
|
printOptionUsage("--profiler-",
|
2021-06-04 06:10:04 +08:00
|
|
|
"Set an actor profiler option. Supported options are:\n"
|
|
|
|
" collector -- None or FluentD (FluentD requires collector_endpoint to be set)\n"
|
|
|
|
" collector_endpoint -- IP:PORT of the fluentd server\n"
|
|
|
|
" collector_protocol -- UDP or TCP (default is UDP)");
|
2022-04-21 01:00:46 +08:00
|
|
|
printf("%s", TLS_HELP);
|
2020-09-24 03:03:09 +08:00
|
|
|
printOptionUsage("-v, --version", "Print version information and exit.");
|
|
|
|
printOptionUsage("-h, -?, --help", "Display this help and exit.");
|
2017-05-26 04:48:44 +08:00
|
|
|
if (devhelp) {
|
2021-12-15 00:44:39 +08:00
|
|
|
printf(" --build-flags Print build information and exit.\n");
|
2022-01-28 11:16:25 +08:00
|
|
|
printOptionUsage("-r ROLE, --role ROLE",
|
|
|
|
" Server role (valid options are fdbd, test, multitest,"
|
|
|
|
" simulation, networktestclient, networktestserver, restore"
|
|
|
|
" consistencycheck, kvfileintegritycheck, kvfilegeneratesums, kvfiledump, unittests)."
|
|
|
|
" The default is `fdbd'.");
|
2017-05-26 04:48:44 +08:00
|
|
|
#ifdef _WIN32
|
2020-09-24 03:03:09 +08:00
|
|
|
printOptionUsage("-n, --newconsole", " Create a new console.");
|
2021-12-15 01:59:14 +08:00
|
|
|
printOptionUsage("-q, --no-dialog", " Disable error dialog on crash.");
|
2020-09-24 03:03:09 +08:00
|
|
|
printOptionUsage("--parentpid PID", " Specify a process after whose termination to exit.");
|
2017-05-26 04:48:44 +08:00
|
|
|
#endif
|
2021-04-05 12:36:05 +08:00
|
|
|
printOptionUsage("-f TESTFILE, --testfile",
|
|
|
|
" Testfile to run, defaults to `tests/default.txt'. If role is `unittests', specifies which "
|
|
|
|
"unit tests to run as a search prefix.");
|
2020-09-24 03:03:09 +08:00
|
|
|
printOptionUsage("-R, --restarting", " Restart a previous simulation that was cleanly shut down.");
|
|
|
|
printOptionUsage("-s SEED, --seed SEED", " Random seed.");
|
|
|
|
printOptionUsage("-k KEY, --key KEY", "Target key for search role.");
|
2022-01-28 11:16:25 +08:00
|
|
|
printOptionUsage("--kvfile FILE",
|
|
|
|
"Input file (SQLite database file) for use by the 'kvfilegeneratesums', "
|
|
|
|
"'kvfileintegritycheck' and 'kvfiledump' roles.");
|
2020-09-24 03:03:09 +08:00
|
|
|
printOptionUsage("-b [on,off], --buggify [on,off]", " Sets Buggify system state, defaults to `off'.");
|
2021-12-15 00:44:39 +08:00
|
|
|
printOptionUsage("-fi [on,off], --fault-injection [on,off]", " Sets fault injection, defaults to `on'.");
|
2020-09-24 03:03:09 +08:00
|
|
|
printOptionUsage("--crash", "Crash on serious errors instead of continuing.");
|
|
|
|
printOptionUsage("-N NETWORKIMPL, --network NETWORKIMPL",
|
|
|
|
" Select network implementation, `net2' (default),"
|
|
|
|
" `net2-threadpool'.");
|
|
|
|
printOptionUsage("--unbufferedout", " Do not buffer stdout and stderr.");
|
|
|
|
printOptionUsage("--bufferedout", " Buffer stdout and stderr.");
|
2023-02-03 02:30:31 +08:00
|
|
|
printOptionUsage("--traceclock CLOCKIMPL",
|
|
|
|
" Select clock source for trace files, `now' (default) or"
|
|
|
|
" `realtime'.");
|
2021-12-15 00:44:39 +08:00
|
|
|
printOptionUsage("--num-testers NUM",
|
2020-09-24 03:03:09 +08:00
|
|
|
" A multitester will wait for NUM testers before starting"
|
|
|
|
" (defaults to 1).");
|
2021-12-15 00:44:39 +08:00
|
|
|
printOptionUsage("--test-PARAMNAME PARAMVALUE",
|
2021-04-05 12:36:05 +08:00
|
|
|
" Set a UnitTest named parameter to the given value. Names are case sensitive.");
|
2019-04-10 04:26:12 +08:00
|
|
|
#ifdef __linux__
|
2020-09-24 03:03:09 +08:00
|
|
|
printOptionUsage("--rsssize SIZE",
|
|
|
|
" Turns on automatic heap profiling when RSS memory size exceeds"
|
|
|
|
" the given threshold. fdbserver needs to be compiled with"
|
|
|
|
" USE_GPERFTOOLS flag in order to use this feature.");
|
2019-04-10 04:26:12 +08:00
|
|
|
#endif
|
2020-09-24 03:03:09 +08:00
|
|
|
printOptionUsage("--testservers ADDRESSES",
|
|
|
|
" The addresses of networktestservers"
|
|
|
|
" specified as ADDRESS:PORT,ADDRESS:PORT...");
|
|
|
|
printOptionUsage("--testonservers", " Testers are recruited on servers.");
|
2021-12-15 00:44:39 +08:00
|
|
|
printOptionUsage("--metrics-cluster CONNFILE",
|
2020-09-24 03:03:09 +08:00
|
|
|
" The cluster file designating where this process will"
|
|
|
|
" store its metric data. By default metrics will be stored"
|
|
|
|
" in the same database the process is participating in.");
|
2021-12-15 00:44:39 +08:00
|
|
|
printOptionUsage("--metrics-prefix PREFIX",
|
2020-09-24 03:03:09 +08:00
|
|
|
" The prefix where this process will store its metric data."
|
|
|
|
" Must be specified if using a different database for metrics.");
|
2021-12-15 00:44:39 +08:00
|
|
|
printOptionUsage("--knob-KNOBNAME KNOBVALUE", " Changes a database knob. KNOBNAME should be lowercase.");
|
|
|
|
printOptionUsage("--io-trust-seconds SECONDS",
|
2020-09-24 03:03:09 +08:00
|
|
|
" Sets the time in seconds that a read or write operation is allowed to take"
|
|
|
|
" before timing out with an error. If an operation times out, all future"
|
|
|
|
" operations on that file will fail with an error as well. Only has an effect"
|
|
|
|
" when using AsyncFileKAIO in Linux.");
|
2021-12-15 00:44:39 +08:00
|
|
|
printOptionUsage("--io-trust-warn-only",
|
2020-09-24 03:03:09 +08:00
|
|
|
" Instead of failing when an I/O operation exceeds io_trust_seconds, just"
|
|
|
|
" log a warning to the trace log. Has no effect if io_trust_seconds is unspecified.");
|
2022-08-08 23:29:49 +08:00
|
|
|
printOptionUsage("--use-future-protocol-version [true,false]",
|
|
|
|
" Run the process with a simulated future protocol version."
|
|
|
|
" This option can be used testing purposes only!");
|
2022-01-28 10:52:00 +08:00
|
|
|
printf("\n"
|
|
|
|
"The 'kvfiledump' role dump all key-values from kvfile to stdout in binary format:\n"
|
|
|
|
"{key length}{key binary}{value length}{value binary}, length is 4 bytes int\n"
|
2022-01-28 12:04:02 +08:00
|
|
|
"(little endianness). This role takes 3 environment variables as parameters:\n"
|
2022-01-28 10:52:00 +08:00
|
|
|
" - FDB_DUMP_STARTKEY: start key for the dump, default is empty\n"
|
|
|
|
" - FDB_DUMP_ENDKEY: end key for the dump, default is \"\\xff\\xff\"\n"
|
|
|
|
" - FDB_DUMP_DEBUG: print key-values to stderr in escaped format\n");
|
2022-07-26 13:12:28 +08:00
|
|
|
|
|
|
|
printf(
|
|
|
|
"\n"
|
|
|
|
"The 'changedescription' role replaces the old cluster key in all coordinators' data file to the specified "
|
|
|
|
"new cluster key,\n"
|
|
|
|
"which is passed in by '--new-cluster-key'. In particular, cluster key means '[description]:[id]'.\n"
|
|
|
|
"'--datadir' is supposed to point to the top level directory of FDB's data, where subdirectories are for "
|
|
|
|
"each process's data.\n"
|
|
|
|
"The given cluster file passed in by '-C, --cluster-file' is considered to contain the old cluster key.\n"
|
|
|
|
"It is used before restoring a snapshotted cluster to let the cluster have a different cluster key.\n"
|
|
|
|
"Please make sure run it on every host in the cluster with the same '--new-cluster-key'.\n");
|
2022-08-08 23:29:49 +08:00
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
} else {
|
2020-09-24 03:03:09 +08:00
|
|
|
printOptionUsage("--dev-help", "Display developer-specific help and exit.");
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
printf("\n"
|
|
|
|
"SIZE parameters may use one of the multiplicative suffixes B=1, KB=10^3,\n"
|
|
|
|
"KiB=2^10, MB=10^6, MiB=2^20, GB=10^9, GiB=2^30, TB=10^12, or TiB=2^40.\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
extern bool g_crashOnError;
|
|
|
|
|
|
|
|
#if defined(ALLOC_INSTRUMENTATION) || defined(ALLOC_INSTRUMENTATION_STDOUT)
|
2019-10-09 06:50:47 +08:00
|
|
|
void* operator new(std::size_t size) {
|
2017-05-26 04:48:44 +08:00
|
|
|
void* p = malloc(size);
|
|
|
|
if (!p)
|
|
|
|
throw std::bad_alloc();
|
|
|
|
recordAllocation(p, size);
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
void operator delete(void* ptr) throw() {
|
|
|
|
recordDeallocation(ptr);
|
|
|
|
free(ptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
// scalar, nothrow new and it matching delete
|
|
|
|
void* operator new(std::size_t size, const std::nothrow_t&) throw() {
|
|
|
|
void* p = malloc(size);
|
|
|
|
recordAllocation(p, size);
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
void operator delete(void* ptr, const std::nothrow_t&) throw() {
|
|
|
|
recordDeallocation(ptr);
|
|
|
|
free(ptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
// array throwing new and matching delete[]
|
2019-10-09 06:50:47 +08:00
|
|
|
void* operator new[](std::size_t size) {
|
2017-05-26 04:48:44 +08:00
|
|
|
void* p = malloc(size);
|
|
|
|
if (!p)
|
|
|
|
throw std::bad_alloc();
|
|
|
|
recordAllocation(p, size);
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
void operator delete[](void* ptr) throw() {
|
|
|
|
recordDeallocation(ptr);
|
|
|
|
free(ptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
// array, nothrow new and matching delete[]
|
|
|
|
void* operator new[](std::size_t size, const std::nothrow_t&) throw() {
|
|
|
|
void* p = malloc(size);
|
|
|
|
recordAllocation(p, size);
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
void operator delete[](void* ptr, const std::nothrow_t&) throw() {
|
|
|
|
recordDeallocation(ptr);
|
|
|
|
free(ptr);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2017-09-16 08:55:01 +08:00
|
|
|
Optional<bool> checkBuggifyOverride(const char* testFile) {
|
|
|
|
std::ifstream ifs;
|
|
|
|
ifs.open(testFile, std::ifstream::in);
|
|
|
|
if (!ifs.good())
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
std::string cline;
|
|
|
|
|
|
|
|
while (ifs.good()) {
|
|
|
|
getline(ifs, cline);
|
|
|
|
std::string line = removeWhitespace(std::string(cline));
|
|
|
|
if (!line.size() || line.find(';') == 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
size_t found = line.find('=');
|
|
|
|
if (found == std::string::npos)
|
|
|
|
// hmmm, not good
|
|
|
|
continue;
|
|
|
|
std::string attrib = removeWhitespace(line.substr(0, found));
|
|
|
|
std::string value = removeWhitespace(line.substr(found + 1));
|
|
|
|
|
|
|
|
if (attrib == "buggify") {
|
2020-07-13 05:42:43 +08:00
|
|
|
// Testspec uses `on` or `off` (without quotes).
|
|
|
|
// TOML uses literal `true` and `false`.
|
|
|
|
if (!strcmp(value.c_str(), "on") || !strcmp(value.c_str(), "true")) {
|
2017-09-16 08:55:01 +08:00
|
|
|
ifs.close();
|
|
|
|
return true;
|
2020-07-13 05:42:43 +08:00
|
|
|
} else if (!strcmp(value.c_str(), "off") || !strcmp(value.c_str(), "false")) {
|
2017-09-16 08:55:01 +08:00
|
|
|
ifs.close();
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
fprintf(stderr, "ERROR: Unknown buggify override state `%s'\n", value.c_str());
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ifs.close();
|
|
|
|
return Optional<bool>();
|
|
|
|
}
|
|
|
|
|
2018-10-23 08:57:09 +08:00
|
|
|
// Takes a vector of public and listen address strings given via command line, and returns vector of NetworkAddress
|
|
|
|
// objects.
|
2021-09-17 08:42:34 +08:00
|
|
|
std::pair<NetworkAddressList, NetworkAddressList> buildNetworkAddresses(
|
2022-01-28 09:43:56 +08:00
|
|
|
IClusterConnectionRecord& connectionRecord,
|
2021-09-17 08:42:34 +08:00
|
|
|
const std::vector<std::string>& publicAddressStrs,
|
|
|
|
std::vector<std::string>& listenAddressStrs) {
|
2019-02-14 07:37:31 +08:00
|
|
|
if (listenAddressStrs.size() > 0 && publicAddressStrs.size() != listenAddressStrs.size()) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"ERROR: Listen addresses (if provided) should be equal to the number of public addresses in order.\n");
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
listenAddressStrs.resize(publicAddressStrs.size(), "public");
|
|
|
|
|
|
|
|
if (publicAddressStrs.size() > 2) {
|
|
|
|
fprintf(stderr, "ERROR: maximum 2 public/listen addresses are allowed\n");
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
|
2018-10-23 08:57:09 +08:00
|
|
|
NetworkAddressList publicNetworkAddresses;
|
|
|
|
NetworkAddressList listenNetworkAddresses;
|
|
|
|
|
2022-04-28 12:54:13 +08:00
|
|
|
std::vector<Hostname>& hostnames = connectionRecord.getConnectionString().hostnames;
|
2022-05-24 02:42:56 +08:00
|
|
|
const std::vector<NetworkAddress>& coords = connectionRecord.getConnectionString().coords;
|
2022-04-28 12:54:13 +08:00
|
|
|
ASSERT(hostnames.size() + coords.size() > 0);
|
2018-10-23 08:57:09 +08:00
|
|
|
|
|
|
|
for (int ii = 0; ii < publicAddressStrs.size(); ++ii) {
|
|
|
|
const std::string& publicAddressStr = publicAddressStrs[ii];
|
2022-09-20 02:35:58 +08:00
|
|
|
bool autoPublicAddress = StringRef(publicAddressStr).startsWith("auto:"_sr);
|
2019-03-24 08:54:46 +08:00
|
|
|
NetworkAddress currentPublicAddress;
|
2018-10-23 08:57:09 +08:00
|
|
|
if (autoPublicAddress) {
|
|
|
|
try {
|
|
|
|
const NetworkAddress& parsedAddress = NetworkAddress::parse("0.0.0.0:" + publicAddressStr.substr(5));
|
2023-03-18 09:07:03 +08:00
|
|
|
const IPAddress publicIP = connectionRecord.getConnectionString().determineLocalSourceIP();
|
2019-03-24 08:54:46 +08:00
|
|
|
currentPublicAddress = NetworkAddress(publicIP, parsedAddress.port, true, parsedAddress.isTLS());
|
2018-10-23 08:57:09 +08:00
|
|
|
} catch (Error& e) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"ERROR: could not determine public address automatically from `%s': %s\n",
|
|
|
|
publicAddressStr.c_str(),
|
|
|
|
e.what());
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
try {
|
2019-03-24 08:54:46 +08:00
|
|
|
currentPublicAddress = NetworkAddress::parse(publicAddressStr);
|
2018-10-23 08:57:09 +08:00
|
|
|
} catch (Error&) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"ERROR: Could not parse network address `%s' (specify as IP_ADDRESS:PORT)\n",
|
|
|
|
publicAddressStr.c_str());
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-24 08:54:46 +08:00
|
|
|
if (ii == 0) {
|
|
|
|
publicNetworkAddresses.address = currentPublicAddress;
|
|
|
|
} else {
|
|
|
|
publicNetworkAddresses.secondaryAddress = currentPublicAddress;
|
|
|
|
}
|
|
|
|
|
2019-02-27 13:03:34 +08:00
|
|
|
if (!currentPublicAddress.isValid()) {
|
2019-03-05 14:13:47 +08:00
|
|
|
fprintf(stderr, "ERROR: %s is not a valid IP address\n", currentPublicAddress.toString().c_str());
|
2019-02-14 07:37:31 +08:00
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
|
2018-10-23 08:57:09 +08:00
|
|
|
const std::string& listenAddressStr = listenAddressStrs[ii];
|
2019-03-24 08:54:46 +08:00
|
|
|
NetworkAddress currentListenAddress;
|
2018-10-23 08:57:09 +08:00
|
|
|
if (listenAddressStr == "public") {
|
2019-03-24 08:54:46 +08:00
|
|
|
currentListenAddress = currentPublicAddress;
|
2018-10-23 08:57:09 +08:00
|
|
|
} else {
|
|
|
|
try {
|
2019-03-24 08:54:46 +08:00
|
|
|
currentListenAddress = NetworkAddress::parse(listenAddressStr);
|
2018-10-23 08:57:09 +08:00
|
|
|
} catch (Error&) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"ERROR: Could not parse network address `%s' (specify as IP_ADDRESS:PORT)\n",
|
|
|
|
listenAddressStr.c_str());
|
|
|
|
throw;
|
|
|
|
}
|
2019-02-14 07:37:31 +08:00
|
|
|
|
2019-03-24 08:54:46 +08:00
|
|
|
if (currentListenAddress.isTLS() != currentPublicAddress.isTLS()) {
|
2019-02-14 07:37:31 +08:00
|
|
|
fprintf(stderr,
|
|
|
|
"ERROR: TLS state of listen address: %s is not equal to the TLS state of public address: %s.\n",
|
|
|
|
listenAddressStr.c_str(),
|
|
|
|
publicAddressStr.c_str());
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
2018-10-23 08:57:09 +08:00
|
|
|
}
|
|
|
|
|
2019-03-24 08:54:46 +08:00
|
|
|
if (ii == 0) {
|
|
|
|
listenNetworkAddresses.address = currentListenAddress;
|
|
|
|
} else {
|
|
|
|
listenNetworkAddresses.secondaryAddress = currentListenAddress;
|
|
|
|
}
|
|
|
|
|
2022-04-28 12:54:13 +08:00
|
|
|
bool matchCoordinatorsTls = std::all_of(coords.begin(), coords.end(), [&](const NetworkAddress& address) {
|
2019-02-27 13:03:34 +08:00
|
|
|
if (address.ip == currentPublicAddress.ip && address.port == currentPublicAddress.port) {
|
|
|
|
return address.isTLS() == currentPublicAddress.isTLS();
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
});
|
2022-04-28 12:54:13 +08:00
|
|
|
// If true, further check hostnames.
|
|
|
|
if (matchCoordinatorsTls) {
|
|
|
|
matchCoordinatorsTls = std::all_of(hostnames.begin(), hostnames.end(), [&](Hostname& hostname) {
|
|
|
|
Optional<NetworkAddress> resolvedAddress = hostname.resolveBlocking();
|
|
|
|
if (resolvedAddress.present()) {
|
|
|
|
NetworkAddress address = resolvedAddress.get();
|
|
|
|
if (address.ip == currentPublicAddress.ip && address.port == currentPublicAddress.port) {
|
|
|
|
return address.isTLS() == currentPublicAddress.isTLS();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
});
|
|
|
|
}
|
|
|
|
if (!matchCoordinatorsTls) {
|
2019-02-27 13:03:34 +08:00
|
|
|
fprintf(stderr,
|
|
|
|
"ERROR: TLS state of public address %s does not match in coordinator list.\n",
|
|
|
|
publicAddressStr.c_str());
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
2018-10-23 08:57:09 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-24 08:54:46 +08:00
|
|
|
if (publicNetworkAddresses.secondaryAddress.present() &&
|
|
|
|
publicNetworkAddresses.address.isTLS() == publicNetworkAddresses.secondaryAddress.get().isTLS()) {
|
2019-02-14 07:37:31 +08:00
|
|
|
fprintf(stderr, "ERROR: only one public address of each TLS state is allowed.\n");
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
2018-10-23 08:57:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return std::make_pair(publicNetworkAddresses, listenNetworkAddresses);
|
|
|
|
}
|
|
|
|
|
2019-07-16 02:33:40 +08:00
|
|
|
// moves files from 'dirSrc' to 'dirToMove' if their name contains 'role'
|
2019-06-20 02:22:23 +08:00
|
|
|
void restoreRoleFilesHelper(std::string dirSrc, std::string dirToMove, std::string role) {
|
|
|
|
std::vector<std::string> returnFiles = platform::listFiles(dirSrc, "");
|
|
|
|
for (const auto& fileEntry : returnFiles) {
|
|
|
|
if (fileEntry != "fdb.cluster" && fileEntry.find(role) != std::string::npos) {
|
|
|
|
// rename files
|
|
|
|
TraceEvent("RenamingSnapFile")
|
|
|
|
.detail("Oldname", dirSrc + "/" + fileEntry)
|
|
|
|
.detail("Newname", dirToMove + "/" + fileEntry);
|
|
|
|
renameFile(dirSrc + "/" + fileEntry, dirToMove + "/" + fileEntry);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-06 08:08:42 +08:00
|
|
|
namespace {
|
2020-11-15 15:06:46 +08:00
|
|
|
enum class ServerRole {
|
2022-07-26 13:12:28 +08:00
|
|
|
ChangeClusterKey,
|
2019-08-10 04:10:12 +08:00
|
|
|
ConsistencyCheck,
|
|
|
|
CreateTemplateDatabase,
|
|
|
|
DSLTest,
|
2019-08-06 08:08:42 +08:00
|
|
|
FDBD,
|
2022-07-26 13:12:28 +08:00
|
|
|
FlowProcess,
|
2019-08-10 04:10:12 +08:00
|
|
|
KVFileGenerateIOLogChecksums,
|
|
|
|
KVFileIntegrityCheck,
|
2022-01-26 13:51:22 +08:00
|
|
|
KVFileDump,
|
2019-08-06 08:08:42 +08:00
|
|
|
MultiTester,
|
|
|
|
NetworkTestClient,
|
|
|
|
NetworkTestServer,
|
|
|
|
Restore,
|
2019-08-10 04:10:12 +08:00
|
|
|
SearchMutations,
|
|
|
|
Simulation,
|
|
|
|
SkipListTest,
|
|
|
|
Test,
|
|
|
|
VersionedMapTest,
|
2022-07-26 13:12:28 +08:00
|
|
|
UnitTests
|
2019-08-06 08:08:42 +08:00
|
|
|
};
|
|
|
|
struct CLIOptions {
|
|
|
|
std::string commandLine;
|
2022-07-29 01:13:58 +08:00
|
|
|
std::string fileSystemPath, dataFolder, connFile, seedConnFile, seedConnString,
|
|
|
|
logFolder = ".", metricsConnFile, metricsPrefix, newClusterKey, authzPublicKeyFile;
|
2019-08-06 08:08:42 +08:00
|
|
|
std::string logGroup = "default";
|
2019-08-10 04:09:59 +08:00
|
|
|
uint64_t rollsize = TRACE_DEFAULT_ROLL_SIZE;
|
|
|
|
uint64_t maxLogsSize = TRACE_DEFAULT_MAX_LOGS_SIZE;
|
|
|
|
bool maxLogsSizeSet = false;
|
|
|
|
int maxLogs = 0;
|
|
|
|
bool maxLogsSet = false;
|
|
|
|
|
2020-11-15 15:06:46 +08:00
|
|
|
ServerRole role = ServerRole::FDBD;
|
2019-08-06 08:08:42 +08:00
|
|
|
uint32_t randomSeed = platform::getRandomSeed();
|
|
|
|
|
|
|
|
const char* testFile = "tests/default.txt";
|
|
|
|
std::string kvFile;
|
|
|
|
std::string testServersStr;
|
|
|
|
std::string whitelistBinPaths;
|
2019-08-10 04:09:59 +08:00
|
|
|
|
2019-08-06 08:08:42 +08:00
|
|
|
std::vector<std::string> publicAddressStrs, listenAddressStrs;
|
2019-08-10 04:09:59 +08:00
|
|
|
NetworkAddressList publicAddresses, listenAddresses;
|
|
|
|
|
2020-08-19 05:18:50 +08:00
|
|
|
const char* targetKey = nullptr;
|
2022-04-07 11:06:24 +08:00
|
|
|
uint64_t memLimit =
|
2019-08-06 08:08:42 +08:00
|
|
|
8LL << 30; // Nice to maintain the same default value for memLimit and SERVER_KNOBS->SERVER_MEM_LIMIT and
|
|
|
|
// SERVER_KNOBS->COMMIT_BATCHES_MEM_BYTES_HARD_LIMIT
|
2022-04-07 11:06:24 +08:00
|
|
|
uint64_t virtualMemLimit = 0; // unlimited
|
2019-08-06 08:08:42 +08:00
|
|
|
uint64_t storageMemLimit = 1LL << 30;
|
2021-07-24 07:28:20 +08:00
|
|
|
bool buggifyEnabled = false, faultInjectionEnabled = true, restarting = false;
|
2019-08-06 08:08:42 +08:00
|
|
|
Optional<Standalone<StringRef>> zoneId;
|
|
|
|
Optional<Standalone<StringRef>> dcId;
|
|
|
|
ProcessClass processClass = ProcessClass(ProcessClass::UnsetClass, ProcessClass::CommandLineSource);
|
|
|
|
bool useNet2 = true;
|
|
|
|
bool useThreadPool = false;
|
|
|
|
std::vector<std::pair<std::string, std::string>> knobs;
|
2021-05-18 10:14:32 +08:00
|
|
|
std::map<std::string, std::string> manualKnobOverrides;
|
2019-08-06 08:08:42 +08:00
|
|
|
LocalityData localities;
|
|
|
|
int minTesterCount = 1;
|
|
|
|
bool testOnServers = false;
|
|
|
|
|
2020-03-07 10:22:46 +08:00
|
|
|
TLSConfig tlsConfig = TLSConfig(TLSEndpointType::SERVER);
|
2019-08-06 08:08:42 +08:00
|
|
|
double fileIoTimeout = 0.0;
|
|
|
|
bool fileIoWarnOnly = false;
|
|
|
|
uint64_t rsssize = -1;
|
2020-06-05 02:18:42 +08:00
|
|
|
std::vector<std::string> blobCredentials; // used for fast restore workers & backup workers
|
2020-01-29 09:53:41 +08:00
|
|
|
const char* blobCredsFromENV = nullptr;
|
2019-08-06 08:08:42 +08:00
|
|
|
|
2021-05-13 01:12:37 +08:00
|
|
|
std::string configPath;
|
2022-06-08 05:49:02 +08:00
|
|
|
ConfigDBType configDBType{ ConfigDBType::PAXOS };
|
2021-05-12 09:23:33 +08:00
|
|
|
|
2021-10-11 11:44:56 +08:00
|
|
|
Reference<IClusterConnectionRecord> connectionFile;
|
2019-08-06 08:08:42 +08:00
|
|
|
Standalone<StringRef> machineId;
|
2021-04-06 17:36:10 +08:00
|
|
|
UnitTestParameters testParams;
|
2019-08-06 08:08:42 +08:00
|
|
|
|
2021-06-04 06:10:04 +08:00
|
|
|
std::map<std::string, std::string> profilerConfig;
|
Remote ikvs debugging (#6465)
* initial structure for remote IKVS server
* moved struct to .h file, added new files to CMakeList
* happy path implementation, connection error when testing
* saved minor local change
* changed tracing to debug
* fixed onClosed and getError being called before init is finished
* fix spawn process bug, now use absolute path
* added server knob to set ikvs process port number
* added server knob for remote/local kv store
* implement simulator remote process spawning
* fixed bug for simulator timeout
* commit all changes
* removed print lines in trace
* added FlowProcess implementation by Markus
* initial debug of FlowProcess, stuck at parent sending OpenKVStoreRequest to child
* temporary fix for process factory throwing segfault on create
* specify public address in command
* change remote kv store knob to false for jenkins build
* made port 0 open random unused port
* change remote store knob to true for benchmark
* set listening port to randomly opened port
* added print lines for jenkins run open kv store timeout debug
* removed most tracing and print lines
* removed tutorial changes
* update handleIOErrors error handling to handle remote-ikvs cases
* Push all debugging changes
* A version where worker bug exists
* A version where restarting tests fail
* Use both the name and the port to determine the child process
* Remove unnecessary update on local address
* Disable remote-kvs for DiskFailureCycle test
* A version where restarting stuck
* A version where most restarting tests green
* Reset connection with child process explicitly
* Remove change on unnecessary files
* Unify flags from _ to -
* fix merging unexpected changes
* fix trac.error to .errorUnsuppressed
* Add license header
* Remove unnecessary header in FlowProcess.actor.cpp
* Fix Windows build
* Fix Windows build, add missing ;
* Fix a stupid bug caused by code dropped by code merging
* Disable remote kvs by default
* Pass the conn_file path to the flow process, though not needed, but the buildNetwork is difficult to tune
* serialization change on readrange
* Update traces
* Refactor the RemoteIKVS interface
* Format files
* Update sim2 interface to not clog connections between parent and child processes in simulation
* Update comments; remove debugging symbols; Add error handling for remote_kvs_cancelled
* Add comments, format files
* Change method name from isBuggifyDisabled to isStableConnection; Decrease(0.1x) latency for stable connections
* Commit the IConnection interface change, forgot in previous commit
* Fix the issue that onClosed request is cancelled by ActorCollection
* Enable the remote kv store knob
* Remove FlowProcess.actor.cpp and move functions to RemoteIKeyValueStore.actor.cpp; Add remote kv store delay to avoid race; Bind the child process to die with parent process
* Fix the bug where one process starts storage server more than once
* Add a please_reboot_remote_kv_store error to restart the storage server worker if remote kvs died abnormally
* Remove unreachable code path and add comments
* Clang format the code
* Fix a simple wait error
* Clang format after merging the main branch
* Testing mixed mode in simulation if remote_kvs knob is enabled, setting the default to false
* Disable remote kvs for PhysicalShardMove which is for RocksDB
* Cleanup #include orders, remove debugging traces
* Revert the reorder in fdbserver.actor.cpp, which fails the gcc build
Co-authored-by: “Lincoln <“lincoln.xiao@snowflake.com”>
2022-04-01 08:08:59 +08:00
|
|
|
std::string flowProcessName;
|
|
|
|
Endpoint flowProcessEndpoint;
|
2021-11-24 04:10:20 +08:00
|
|
|
bool printSimTime = false;
|
2022-02-18 23:48:44 +08:00
|
|
|
IPAllowList allowList;
|
2021-06-04 06:10:04 +08:00
|
|
|
|
2019-08-06 08:08:42 +08:00
|
|
|
static CLIOptions parseArgs(int argc, char* argv[]) {
|
|
|
|
CLIOptions opts;
|
|
|
|
opts.parseArgsInternal(argc, argv);
|
2022-08-03 02:29:37 +08:00
|
|
|
opts.parseEnvInternal();
|
2019-08-06 08:08:42 +08:00
|
|
|
return opts;
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2022-02-23 06:44:08 +08:00
|
|
|
// Determine publicAddresses and listenAddresses by calling buildNetworkAddresses().
|
2022-02-23 03:15:44 +08:00
|
|
|
void buildNetwork(const char* name) {
|
|
|
|
try {
|
|
|
|
if (!publicAddressStrs.empty()) {
|
|
|
|
std::tie(publicAddresses, listenAddresses) =
|
|
|
|
buildNetworkAddresses(*connectionFile, publicAddressStrs, listenAddressStrs);
|
|
|
|
}
|
|
|
|
} catch (Error&) {
|
|
|
|
printHelpTeaser(name);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (role == ServerRole::ConsistencyCheck) {
|
|
|
|
if (!publicAddressStrs.empty()) {
|
|
|
|
fprintf(stderr, "ERROR: Public address cannot be specified for consistency check processes\n");
|
|
|
|
printHelpTeaser(name);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
2023-03-18 09:07:03 +08:00
|
|
|
auto publicIP = connectionFile->getConnectionString().determineLocalSourceIP();
|
2022-02-23 03:15:44 +08:00
|
|
|
publicAddresses.address = NetworkAddress(publicIP, ::getpid());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-06 08:08:42 +08:00
|
|
|
private:
|
|
|
|
CLIOptions() = default;
|
2018-06-21 00:21:23 +08:00
|
|
|
|
2022-08-03 02:29:37 +08:00
|
|
|
void parseEnvInternal() {
|
2022-08-29 20:15:03 +08:00
|
|
|
for (const std::string& knob : getEnvironmentKnobOptions()) {
|
2022-08-24 05:00:10 +08:00
|
|
|
auto pos = knob.find_first_of("=");
|
|
|
|
if (pos == std::string::npos) {
|
2022-08-03 02:29:37 +08:00
|
|
|
fprintf(stderr,
|
|
|
|
"Error: malformed environment knob option: %s%s\n",
|
|
|
|
ENVIRONMENT_KNOB_OPTION_PREFIX,
|
|
|
|
knob.c_str());
|
2022-08-26 00:53:36 +08:00
|
|
|
TraceEvent(SevWarnAlways, "MalformedEnvironmentVariableKnob")
|
2022-08-25 01:40:22 +08:00
|
|
|
.detail("Key", ENVIRONMENT_KNOB_OPTION_PREFIX + knob);
|
2022-08-03 02:29:37 +08:00
|
|
|
} else {
|
2022-08-24 05:00:10 +08:00
|
|
|
std::string k = knob.substr(0, pos);
|
|
|
|
std::string v = knob.substr(pos + 1, knob.length());
|
|
|
|
knobs.emplace_back(k, v);
|
|
|
|
manualKnobOverrides[k] = v;
|
2022-08-03 02:29:37 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-06 08:08:42 +08:00
|
|
|
void parseArgsInternal(int argc, char* argv[]) {
|
|
|
|
for (int a = 0; a < argc; a++) {
|
2017-05-26 04:48:44 +08:00
|
|
|
if (a)
|
|
|
|
commandLine += ' ';
|
|
|
|
commandLine += argv[a];
|
|
|
|
}
|
|
|
|
|
2021-12-15 00:44:39 +08:00
|
|
|
CSimpleOpt args(argc, argv, g_rgOptions, SO_O_EXACT | SO_O_HYPHEN_TO_UNDERSCORE);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-08-06 08:08:42 +08:00
|
|
|
if (argc == 1) {
|
2017-05-26 04:48:44 +08:00
|
|
|
printUsage(argv[0], false);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
|
|
|
|
while (args.Next()) {
|
|
|
|
if (args.LastError() == SO_ARG_INVALID_DATA) {
|
|
|
|
fprintf(stderr, "ERROR: invalid argument to option `%s'\n", args.OptionText());
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
if (args.LastError() == SO_ARG_INVALID) {
|
|
|
|
fprintf(stderr, "ERROR: argument given for option `%s'\n", args.OptionText());
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
if (args.LastError() == SO_ARG_MISSING) {
|
|
|
|
fprintf(stderr, "ERROR: missing argument for option `%s'\n", args.OptionText());
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
if (args.LastError() == SO_OPT_INVALID) {
|
|
|
|
fprintf(stderr, "ERROR: unknown option: `%s'\n", args.OptionText());
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
if (args.LastError() != SO_SUCCESS) {
|
|
|
|
fprintf(stderr, "ERROR: error parsing options\n");
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
2019-08-06 08:08:42 +08:00
|
|
|
const char* sRole;
|
2017-05-26 04:48:44 +08:00
|
|
|
Optional<uint64_t> ti;
|
2019-06-19 09:15:15 +08:00
|
|
|
std::string argStr;
|
2019-06-21 14:03:49 +08:00
|
|
|
std::vector<std::string> tmpStrings;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
switch (args.OptionId()) {
|
2019-08-06 08:08:42 +08:00
|
|
|
case OPT_HELP:
|
|
|
|
printUsage(argv[0], false);
|
|
|
|
flushAndExit(FDB_EXIT_SUCCESS);
|
|
|
|
break;
|
|
|
|
case OPT_DEVHELP:
|
|
|
|
printUsage(argv[0], true);
|
|
|
|
flushAndExit(FDB_EXIT_SUCCESS);
|
|
|
|
break;
|
2022-07-20 04:15:51 +08:00
|
|
|
case OPT_PRINT_CODE_PROBES:
|
|
|
|
probe::ICodeProbe::printProbesJSON({ std::string(args.OptionArg()) });
|
|
|
|
flushAndExit(FDB_EXIT_SUCCESS);
|
|
|
|
break;
|
2019-08-06 08:08:42 +08:00
|
|
|
case OPT_KNOB: {
|
2021-12-15 04:01:44 +08:00
|
|
|
Optional<std::string> knobName = extractPrefixedArgument("--knob", args.OptionSyntax());
|
|
|
|
if (!knobName.present()) {
|
|
|
|
fprintf(stderr, "ERROR: unable to parse knob option '%s'\n", args.OptionSyntax());
|
2019-08-06 08:08:42 +08:00
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-12-15 04:01:44 +08:00
|
|
|
knobs.emplace_back(knobName.get(), args.OptionArg());
|
|
|
|
manualKnobOverrides[knobName.get()] = args.OptionArg();
|
2019-08-06 08:08:42 +08:00
|
|
|
break;
|
|
|
|
}
|
2021-06-04 06:10:04 +08:00
|
|
|
case OPT_PROFILER: {
|
2021-12-15 04:01:44 +08:00
|
|
|
Optional<std::string> profilerArg = extractPrefixedArgument("--profiler", args.OptionSyntax());
|
|
|
|
if (!profilerArg.present()) {
|
|
|
|
fprintf(stderr, "ERROR: unable to parse profiler option '%s'\n", args.OptionSyntax());
|
2021-06-04 06:10:04 +08:00
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
2021-12-15 04:01:44 +08:00
|
|
|
profilerConfig.emplace(profilerArg.get(), args.OptionArg());
|
2021-06-04 06:10:04 +08:00
|
|
|
break;
|
|
|
|
};
|
2021-04-05 12:36:05 +08:00
|
|
|
case OPT_UNITTESTPARAM: {
|
2021-12-15 04:01:44 +08:00
|
|
|
Optional<std::string> testArg = extractPrefixedArgument("--test", args.OptionSyntax());
|
|
|
|
if (!testArg.present()) {
|
|
|
|
fprintf(stderr, "ERROR: unable to parse unit test option '%s'\n", args.OptionSyntax());
|
2021-04-05 12:36:05 +08:00
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
2021-12-15 04:01:44 +08:00
|
|
|
testParams.set(testArg.get(), args.OptionArg());
|
2021-04-05 12:36:05 +08:00
|
|
|
break;
|
|
|
|
}
|
2019-08-06 08:08:42 +08:00
|
|
|
case OPT_LOCALITY: {
|
2021-12-15 04:01:44 +08:00
|
|
|
Optional<std::string> localityKey = extractPrefixedArgument("--locality", args.OptionSyntax());
|
|
|
|
if (!localityKey.present()) {
|
|
|
|
fprintf(stderr, "ERROR: unable to parse locality key '%s'\n", args.OptionSyntax());
|
2019-08-06 08:08:42 +08:00
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
2019-04-06 07:06:30 +08:00
|
|
|
}
|
2021-12-15 04:01:44 +08:00
|
|
|
Standalone<StringRef> key = StringRef(localityKey.get());
|
|
|
|
std::transform(key.begin(), key.end(), mutateString(key), ::tolower);
|
|
|
|
localities.set(key, Standalone<StringRef>(std::string(args.OptionArg())));
|
2019-08-06 08:08:42 +08:00
|
|
|
break;
|
|
|
|
}
|
2022-02-18 23:48:44 +08:00
|
|
|
case OPT_IP_TRUSTED_MASK: {
|
|
|
|
Optional<std::string> subnetKey = extractPrefixedArgument("--trusted-subnet", args.OptionSyntax());
|
|
|
|
if (!subnetKey.present()) {
|
|
|
|
fprintf(stderr, "ERROR: unable to parse locality key '%s'\n", args.OptionSyntax());
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
allowList.addTrustedSubnet(args.OptionArg());
|
|
|
|
break;
|
|
|
|
}
|
2019-08-06 08:08:42 +08:00
|
|
|
case OPT_VERSION:
|
|
|
|
printVersion();
|
|
|
|
flushAndExit(FDB_EXIT_SUCCESS);
|
|
|
|
break;
|
2020-09-11 02:06:56 +08:00
|
|
|
case OPT_BUILD_FLAGS:
|
|
|
|
printBuildInformation();
|
|
|
|
flushAndExit(FDB_EXIT_SUCCESS);
|
2019-08-06 08:08:42 +08:00
|
|
|
case OPT_NOBUFSTDOUT:
|
2020-08-19 05:18:50 +08:00
|
|
|
setvbuf(stdout, nullptr, _IONBF, 0);
|
|
|
|
setvbuf(stderr, nullptr, _IONBF, 0);
|
2019-08-06 08:08:42 +08:00
|
|
|
break;
|
|
|
|
case OPT_BUFSTDOUTERR:
|
2020-08-19 05:18:50 +08:00
|
|
|
setvbuf(stdout, nullptr, _IOFBF, BUFSIZ);
|
|
|
|
setvbuf(stderr, nullptr, _IOFBF, BUFSIZ);
|
2019-08-06 08:08:42 +08:00
|
|
|
break;
|
|
|
|
case OPT_ROLE:
|
|
|
|
sRole = args.OptionArg();
|
|
|
|
if (!strcmp(sRole, "fdbd"))
|
2020-11-15 15:06:46 +08:00
|
|
|
role = ServerRole::FDBD;
|
2019-08-06 08:08:42 +08:00
|
|
|
else if (!strcmp(sRole, "simulation"))
|
2020-11-15 15:06:46 +08:00
|
|
|
role = ServerRole::Simulation;
|
2019-08-06 08:08:42 +08:00
|
|
|
else if (!strcmp(sRole, "test"))
|
2020-11-15 15:06:46 +08:00
|
|
|
role = ServerRole::Test;
|
2019-08-06 08:08:42 +08:00
|
|
|
else if (!strcmp(sRole, "multitest"))
|
2020-11-15 15:06:46 +08:00
|
|
|
role = ServerRole::MultiTester;
|
2019-08-06 08:08:42 +08:00
|
|
|
else if (!strcmp(sRole, "skiplisttest"))
|
2020-11-15 15:06:46 +08:00
|
|
|
role = ServerRole::SkipListTest;
|
2019-08-06 08:08:42 +08:00
|
|
|
else if (!strcmp(sRole, "search"))
|
2020-11-15 15:06:46 +08:00
|
|
|
role = ServerRole::SearchMutations;
|
2019-08-06 08:08:42 +08:00
|
|
|
else if (!strcmp(sRole, "dsltest"))
|
2020-11-15 15:06:46 +08:00
|
|
|
role = ServerRole::DSLTest;
|
2019-08-06 08:08:42 +08:00
|
|
|
else if (!strcmp(sRole, "versionedmaptest"))
|
2020-11-15 15:06:46 +08:00
|
|
|
role = ServerRole::VersionedMapTest;
|
2019-08-06 08:08:42 +08:00
|
|
|
else if (!strcmp(sRole, "createtemplatedb"))
|
2020-11-15 15:06:46 +08:00
|
|
|
role = ServerRole::CreateTemplateDatabase;
|
2019-08-06 08:08:42 +08:00
|
|
|
else if (!strcmp(sRole, "networktestclient"))
|
2020-11-15 15:06:46 +08:00
|
|
|
role = ServerRole::NetworkTestClient;
|
2019-08-06 08:08:42 +08:00
|
|
|
else if (!strcmp(sRole, "networktestserver"))
|
2020-11-15 15:06:46 +08:00
|
|
|
role = ServerRole::NetworkTestServer;
|
2019-08-06 08:08:42 +08:00
|
|
|
else if (!strcmp(sRole, "restore"))
|
2020-11-15 15:06:46 +08:00
|
|
|
role = ServerRole::Restore;
|
2019-08-06 08:08:42 +08:00
|
|
|
else if (!strcmp(sRole, "kvfileintegritycheck"))
|
2020-11-15 15:06:46 +08:00
|
|
|
role = ServerRole::KVFileIntegrityCheck;
|
2019-08-06 08:08:42 +08:00
|
|
|
else if (!strcmp(sRole, "kvfilegeneratesums"))
|
2020-11-15 15:06:46 +08:00
|
|
|
role = ServerRole::KVFileGenerateIOLogChecksums;
|
2022-01-26 13:51:22 +08:00
|
|
|
else if (!strcmp(sRole, "kvfiledump"))
|
|
|
|
role = ServerRole::KVFileDump;
|
2019-08-06 08:08:42 +08:00
|
|
|
else if (!strcmp(sRole, "consistencycheck"))
|
2020-11-15 15:06:46 +08:00
|
|
|
role = ServerRole::ConsistencyCheck;
|
2021-04-05 12:36:05 +08:00
|
|
|
else if (!strcmp(sRole, "unittests"))
|
|
|
|
role = ServerRole::UnitTests;
|
Remote ikvs debugging (#6465)
* initial structure for remote IKVS server
* moved struct to .h file, added new files to CMakeList
* happy path implementation, connection error when testing
* saved minor local change
* changed tracing to debug
* fixed onClosed and getError being called before init is finished
* fix spawn process bug, now use absolute path
* added server knob to set ikvs process port number
* added server knob for remote/local kv store
* implement simulator remote process spawning
* fixed bug for simulator timeout
* commit all changes
* removed print lines in trace
* added FlowProcess implementation by Markus
* initial debug of FlowProcess, stuck at parent sending OpenKVStoreRequest to child
* temporary fix for process factory throwing segfault on create
* specify public address in command
* change remote kv store knob to false for jenkins build
* made port 0 open random unused port
* change remote store knob to true for benchmark
* set listening port to randomly opened port
* added print lines for jenkins run open kv store timeout debug
* removed most tracing and print lines
* removed tutorial changes
* update handleIOErrors error handling to handle remote-ikvs cases
* Push all debugging changes
* A version where worker bug exists
* A version where restarting tests fail
* Use both the name and the port to determine the child process
* Remove unnecessary update on local address
* Disable remote-kvs for DiskFailureCycle test
* A version where restarting stuck
* A version where most restarting tests green
* Reset connection with child process explicitly
* Remove change on unnecessary files
* Unify flags from _ to -
* fix merging unexpected changes
* fix trac.error to .errorUnsuppressed
* Add license header
* Remove unnecessary header in FlowProcess.actor.cpp
* Fix Windows build
* Fix Windows build, add missing ;
* Fix a stupid bug caused by code dropped by code merging
* Disable remote kvs by default
* Pass the conn_file path to the flow process, though not needed, but the buildNetwork is difficult to tune
* serialization change on readrange
* Update traces
* Refactor the RemoteIKVS interface
* Format files
* Update sim2 interface to not clog connections between parent and child processes in simulation
* Update comments; remove debugging symbols; Add error handling for remote_kvs_cancelled
* Add comments, format files
* Change method name from isBuggifyDisabled to isStableConnection; Decrease(0.1x) latency for stable connections
* Commit the IConnection interface change, forgot in previous commit
* Fix the issue that onClosed request is cancelled by ActorCollection
* Enable the remote kv store knob
* Remove FlowProcess.actor.cpp and move functions to RemoteIKeyValueStore.actor.cpp; Add remote kv store delay to avoid race; Bind the child process to die with parent process
* Fix the bug where one process starts storage server more than once
* Add a please_reboot_remote_kv_store error to restart the storage server worker if remote kvs died abnormally
* Remove unreachable code path and add comments
* Clang format the code
* Fix a simple wait error
* Clang format after merging the main branch
* Testing mixed mode in simulation if remote_kvs knob is enabled, setting the default to false
* Disable remote kvs for PhysicalShardMove which is for RocksDB
* Cleanup #include orders, remove debugging traces
* Revert the reorder in fdbserver.actor.cpp, which fails the gcc build
Co-authored-by: “Lincoln <“lincoln.xiao@snowflake.com”>
2022-04-01 08:08:59 +08:00
|
|
|
else if (!strcmp(sRole, "flowprocess"))
|
|
|
|
role = ServerRole::FlowProcess;
|
2022-07-26 13:12:28 +08:00
|
|
|
else if (!strcmp(sRole, "changeclusterkey"))
|
|
|
|
role = ServerRole::ChangeClusterKey;
|
2019-08-06 08:08:42 +08:00
|
|
|
else {
|
|
|
|
fprintf(stderr, "ERROR: Unknown role `%s'\n", sRole);
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2019-08-06 08:08:42 +08:00
|
|
|
break;
|
|
|
|
case OPT_PUBLICADDR:
|
|
|
|
argStr = args.OptionArg();
|
|
|
|
boost::split(tmpStrings, argStr, [](char c) { return c == ','; });
|
|
|
|
publicAddressStrs.insert(publicAddressStrs.end(), tmpStrings.begin(), tmpStrings.end());
|
|
|
|
break;
|
|
|
|
case OPT_LISTEN:
|
|
|
|
argStr = args.OptionArg();
|
|
|
|
boost::split(tmpStrings, argStr, [](char c) { return c == ','; });
|
|
|
|
listenAddressStrs.insert(listenAddressStrs.end(), tmpStrings.begin(), tmpStrings.end());
|
|
|
|
break;
|
|
|
|
case OPT_CONNFILE:
|
|
|
|
connFile = args.OptionArg();
|
|
|
|
break;
|
|
|
|
case OPT_LOGGROUP:
|
|
|
|
logGroup = args.OptionArg();
|
|
|
|
break;
|
|
|
|
case OPT_SEEDCONNFILE:
|
|
|
|
seedConnFile = args.OptionArg();
|
|
|
|
break;
|
|
|
|
case OPT_SEEDCONNSTRING:
|
|
|
|
seedConnString = args.OptionArg();
|
|
|
|
break;
|
|
|
|
#ifdef __linux__
|
|
|
|
case OPT_FILESYSTEM: {
|
|
|
|
fileSystemPath = args.OptionArg();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OPT_PROFILER_RSS_SIZE: {
|
|
|
|
const char* a = args.OptionArg();
|
|
|
|
char* end;
|
|
|
|
rsssize = strtoull(a, &end, 10);
|
|
|
|
if (*end) {
|
|
|
|
fprintf(stderr, "ERROR: Unrecognized memory size `%s'\n", a);
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2019-08-06 08:08:42 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
case OPT_DATAFOLDER:
|
|
|
|
dataFolder = args.OptionArg();
|
|
|
|
break;
|
|
|
|
case OPT_LOGFOLDER:
|
|
|
|
logFolder = args.OptionArg();
|
|
|
|
break;
|
|
|
|
case OPT_NETWORKIMPL: {
|
|
|
|
const char* a = args.OptionArg();
|
|
|
|
if (!strcmp(a, "net2"))
|
|
|
|
useNet2 = true;
|
|
|
|
else if (!strcmp(a, "net2-threadpool")) {
|
|
|
|
useNet2 = true;
|
|
|
|
useThreadPool = true;
|
|
|
|
} else {
|
|
|
|
fprintf(stderr, "ERROR: Unknown network implementation `%s'\n", a);
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2019-08-06 08:08:42 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OPT_TRACECLOCK: {
|
|
|
|
const char* a = args.OptionArg();
|
2020-01-17 10:37:47 +08:00
|
|
|
if (!strcmp(a, "realtime"))
|
|
|
|
g_trace_clock.store(TRACE_CLOCK_REALTIME);
|
|
|
|
else if (!strcmp(a, "now"))
|
|
|
|
g_trace_clock.store(TRACE_CLOCK_NOW);
|
2019-08-06 08:08:42 +08:00
|
|
|
else {
|
|
|
|
fprintf(stderr, "ERROR: Unknown clock source `%s'\n", a);
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2019-08-06 08:08:42 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OPT_NUMTESTERS: {
|
|
|
|
const char* a = args.OptionArg();
|
|
|
|
if (!sscanf(a, "%d", &minTesterCount)) {
|
|
|
|
fprintf(stderr, "ERROR: Could not parse numtesters `%s'\n", a);
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2019-08-06 08:08:42 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OPT_ROLLSIZE: {
|
|
|
|
const char* a = args.OptionArg();
|
|
|
|
ti = parse_with_suffix(a);
|
|
|
|
if (!ti.present()) {
|
|
|
|
fprintf(stderr, "ERROR: Could not parse logsize `%s'\n", a);
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2019-08-06 08:08:42 +08:00
|
|
|
rollsize = ti.get();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OPT_MAXLOGSSIZE: {
|
|
|
|
const char* a = args.OptionArg();
|
|
|
|
ti = parse_with_suffix(a);
|
|
|
|
if (!ti.present()) {
|
|
|
|
fprintf(stderr, "ERROR: Could not parse maxlogssize `%s'\n", a);
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2019-08-06 08:08:42 +08:00
|
|
|
maxLogsSize = ti.get();
|
|
|
|
maxLogsSizeSet = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OPT_MAXLOGS: {
|
|
|
|
const char* a = args.OptionArg();
|
|
|
|
char* end;
|
|
|
|
maxLogs = strtoull(a, &end, 10);
|
|
|
|
if (*end) {
|
|
|
|
fprintf(stderr, "ERROR: Unrecognized maximum number of logs `%s'\n", a);
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
2019-04-26 03:49:14 +08:00
|
|
|
}
|
2019-08-06 08:08:42 +08:00
|
|
|
maxLogsSet = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
#ifdef _WIN32
|
|
|
|
case OPT_PARENTPID: {
|
|
|
|
auto pid_str = args.OptionArg();
|
|
|
|
int parent_pid = atoi(pid_str);
|
|
|
|
auto pHandle = OpenProcess(SYNCHRONIZE, FALSE, parent_pid);
|
|
|
|
if (!pHandle) {
|
|
|
|
TraceEvent("ParentProcessOpenError").GetLastError();
|
|
|
|
fprintf(stderr, "Could not open parent process at pid %d (error %d)", parent_pid, GetLastError());
|
|
|
|
throw platform_error();
|
|
|
|
}
|
2022-01-26 03:21:46 +08:00
|
|
|
startThread(&parentWatcher, pHandle, 0, "fdb-parentwatch");
|
2019-08-06 08:08:42 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OPT_NEWCONSOLE:
|
|
|
|
FreeConsole();
|
|
|
|
AllocConsole();
|
|
|
|
freopen("CONIN$", "rb", stdin);
|
|
|
|
freopen("CONOUT$", "wb", stdout);
|
|
|
|
freopen("CONOUT$", "wb", stderr);
|
|
|
|
break;
|
|
|
|
case OPT_NOBOX:
|
|
|
|
SetErrorMode(SetErrorMode(0) | SEM_NOGPFAULTERRORBOX);
|
|
|
|
break;
|
|
|
|
#else
|
|
|
|
case OPT_PARENTPID: {
|
|
|
|
auto pid_str = args.OptionArg();
|
|
|
|
int* parent_pid = new (int);
|
|
|
|
*parent_pid = atoi(pid_str);
|
2022-01-26 03:21:46 +08:00
|
|
|
startThread(&parentWatcher, parent_pid, 0, "fdb-parentwatch");
|
2019-08-06 08:08:42 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif
|
2020-07-08 00:06:13 +08:00
|
|
|
case OPT_TRACER: {
|
|
|
|
std::string arg = args.OptionArg();
|
|
|
|
std::string tracer;
|
|
|
|
std::transform(arg.begin(), arg.end(), std::back_inserter(tracer), [](char c) { return tolower(c); });
|
|
|
|
if (tracer == "none" || tracer == "disabled") {
|
|
|
|
openTracer(TracerType::DISABLED);
|
|
|
|
} else if (tracer == "logfile" || tracer == "file" || tracer == "log_file") {
|
|
|
|
openTracer(TracerType::LOG_FILE);
|
2020-11-14 07:50:20 +08:00
|
|
|
} else if (tracer == "network_lossy") {
|
|
|
|
openTracer(TracerType::NETWORK_LOSSY);
|
2020-07-08 00:06:13 +08:00
|
|
|
} else {
|
|
|
|
fprintf(stderr, "ERROR: Unknown or unsupported tracer: `%s'", args.OptionArg());
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2019-08-06 08:08:42 +08:00
|
|
|
case OPT_TESTFILE:
|
|
|
|
testFile = args.OptionArg();
|
|
|
|
break;
|
|
|
|
case OPT_KVFILE:
|
|
|
|
kvFile = args.OptionArg();
|
|
|
|
break;
|
|
|
|
case OPT_RESTARTING:
|
|
|
|
restarting = true;
|
|
|
|
break;
|
|
|
|
case OPT_RANDOMSEED: {
|
|
|
|
char* end;
|
|
|
|
randomSeed = (uint32_t)strtoul(args.OptionArg(), &end, 0);
|
|
|
|
if (*end) {
|
|
|
|
fprintf(stderr, "ERROR: Could not parse random seed `%s'\n", args.OptionArg());
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OPT_MACHINEID: {
|
|
|
|
zoneId = std::string(args.OptionArg());
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OPT_DCID: {
|
|
|
|
dcId = std::string(args.OptionArg());
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OPT_MACHINE_CLASS:
|
|
|
|
sRole = args.OptionArg();
|
|
|
|
processClass = ProcessClass(sRole, ProcessClass::CommandLineSource);
|
|
|
|
if (processClass == ProcessClass::InvalidClass) {
|
|
|
|
fprintf(stderr, "ERROR: Unknown machine class `%s'\n", sRole);
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2019-08-06 08:08:42 +08:00
|
|
|
break;
|
|
|
|
case OPT_KEY:
|
|
|
|
targetKey = args.OptionArg();
|
|
|
|
break;
|
|
|
|
case OPT_MEMLIMIT:
|
|
|
|
ti = parse_with_suffix(args.OptionArg(), "MiB");
|
|
|
|
if (!ti.present()) {
|
|
|
|
fprintf(stderr, "ERROR: Could not parse memory limit from `%s'\n", args.OptionArg());
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2019-08-06 08:08:42 +08:00
|
|
|
memLimit = ti.get();
|
|
|
|
break;
|
2022-04-07 11:06:24 +08:00
|
|
|
case OPT_VMEMLIMIT:
|
|
|
|
ti = parse_with_suffix(args.OptionArg(), "MiB");
|
|
|
|
if (!ti.present()) {
|
|
|
|
fprintf(stderr, "ERROR: Could not parse virtual memory limit from `%s'\n", args.OptionArg());
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
virtualMemLimit = ti.get();
|
|
|
|
break;
|
2019-08-06 08:08:42 +08:00
|
|
|
case OPT_STORAGEMEMLIMIT:
|
|
|
|
ti = parse_with_suffix(args.OptionArg(), "MB");
|
|
|
|
if (!ti.present()) {
|
|
|
|
fprintf(stderr, "ERROR: Could not parse storage memory limit from `%s'\n", args.OptionArg());
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2019-08-06 08:08:42 +08:00
|
|
|
storageMemLimit = ti.get();
|
|
|
|
break;
|
|
|
|
case OPT_CACHEMEMLIMIT:
|
|
|
|
ti = parse_with_suffix(args.OptionArg(), "MiB");
|
|
|
|
if (!ti.present()) {
|
|
|
|
fprintf(stderr, "ERROR: Could not parse cache memory limit from `%s'\n", args.OptionArg());
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2019-08-06 08:08:42 +08:00
|
|
|
// SOMEDAY: ideally we'd have some better way to express that a knob should be elevated to formal
|
|
|
|
// parameter
|
2021-05-11 07:32:02 +08:00
|
|
|
knobs.emplace_back(
|
2019-08-06 08:08:42 +08:00
|
|
|
"page_cache_4k",
|
2022-02-26 05:28:29 +08:00
|
|
|
format("%lld", ti.get() / 4096 * 4096)); // The cache holds 4K pages, so we can truncate this to the
|
|
|
|
// next smaller multiple of 4K.
|
2019-08-06 08:08:42 +08:00
|
|
|
break;
|
|
|
|
case OPT_BUGGIFY:
|
|
|
|
if (!strcmp(args.OptionArg(), "on"))
|
|
|
|
buggifyEnabled = true;
|
|
|
|
else if (!strcmp(args.OptionArg(), "off"))
|
|
|
|
buggifyEnabled = false;
|
|
|
|
else {
|
|
|
|
fprintf(stderr, "ERROR: Unknown buggify state `%s'\n", args.OptionArg());
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
break;
|
2021-07-24 07:28:20 +08:00
|
|
|
case OPT_FAULT_INJECTION:
|
|
|
|
if (!strcmp(args.OptionArg(), "on"))
|
|
|
|
faultInjectionEnabled = true;
|
|
|
|
else if (!strcmp(args.OptionArg(), "off"))
|
|
|
|
faultInjectionEnabled = false;
|
|
|
|
else {
|
|
|
|
fprintf(stderr, "ERROR: Unknown fault injection state `%s'\n", args.OptionArg());
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
break;
|
2019-08-06 08:08:42 +08:00
|
|
|
case OPT_CRASHONERROR:
|
|
|
|
g_crashOnError = true;
|
|
|
|
break;
|
|
|
|
case OPT_TESTSERVERS:
|
|
|
|
testServersStr = args.OptionArg();
|
|
|
|
break;
|
|
|
|
case OPT_TEST_ON_SERVERS:
|
|
|
|
testOnServers = true;
|
|
|
|
break;
|
|
|
|
case OPT_METRICSCONNFILE:
|
|
|
|
metricsConnFile = args.OptionArg();
|
|
|
|
break;
|
|
|
|
case OPT_METRICSPREFIX:
|
|
|
|
metricsPrefix = args.OptionArg();
|
|
|
|
break;
|
|
|
|
case OPT_IO_TRUST_SECONDS: {
|
|
|
|
const char* a = args.OptionArg();
|
|
|
|
if (!sscanf(a, "%lf", &fileIoTimeout)) {
|
|
|
|
fprintf(stderr, "ERROR: Could not parse io_trust_seconds `%s'\n", a);
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OPT_IO_TRUST_WARN_ONLY:
|
|
|
|
fileIoWarnOnly = true;
|
|
|
|
break;
|
|
|
|
case OPT_TRACE_FORMAT:
|
|
|
|
if (!selectTraceFormatter(args.OptionArg())) {
|
|
|
|
fprintf(stderr, "WARNING: Unrecognized trace format `%s'\n", args.OptionArg());
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case OPT_WHITELIST_BINPATH:
|
|
|
|
whitelistBinPaths = args.OptionArg();
|
|
|
|
break;
|
2020-01-29 09:53:41 +08:00
|
|
|
case OPT_BLOB_CREDENTIAL_FILE:
|
|
|
|
// Add blob credential following backup agent example
|
|
|
|
blobCredentials.push_back(args.OptionArg());
|
|
|
|
printf("blob credential file:%s\n", blobCredentials.back().c_str());
|
|
|
|
|
|
|
|
blobCredsFromENV = getenv("FDB_BLOB_CREDENTIALS");
|
|
|
|
if (blobCredsFromENV != nullptr) {
|
|
|
|
fprintf(stderr, "[WARNING] Set blob credetial via env variable is not tested yet\n");
|
|
|
|
TraceEvent(SevError, "FastRestoreGetBlobCredentialFile")
|
|
|
|
.detail("Reason", "Set blob credetial via env variable is not tested yet");
|
|
|
|
StringRef t((uint8_t*)blobCredsFromENV, strlen(blobCredsFromENV));
|
|
|
|
do {
|
|
|
|
StringRef file = t.eat(":");
|
|
|
|
if (file.size() != 0) {
|
|
|
|
blobCredentials.push_back(file.toString());
|
|
|
|
}
|
|
|
|
} while (t.size() != 0);
|
|
|
|
}
|
|
|
|
break;
|
2021-05-13 01:12:37 +08:00
|
|
|
case OPT_CONFIG_PATH:
|
|
|
|
configPath = args.OptionArg();
|
2021-05-12 09:23:33 +08:00
|
|
|
break;
|
2021-05-18 10:14:32 +08:00
|
|
|
case OPT_USE_TEST_CONFIG_DB:
|
2021-08-07 14:18:10 +08:00
|
|
|
configDBType = ConfigDBType::SIMPLE;
|
2021-05-18 10:14:32 +08:00
|
|
|
break;
|
2022-07-28 05:02:01 +08:00
|
|
|
case OPT_NO_CONFIG_DB:
|
|
|
|
configDBType = ConfigDBType::DISABLED;
|
|
|
|
break;
|
Remote ikvs debugging (#6465)
* initial structure for remote IKVS server
* moved struct to .h file, added new files to CMakeList
* happy path implementation, connection error when testing
* saved minor local change
* changed tracing to debug
* fixed onClosed and getError being called before init is finished
* fix spawn process bug, now use absolute path
* added server knob to set ikvs process port number
* added server knob for remote/local kv store
* implement simulator remote process spawning
* fixed bug for simulator timeout
* commit all changes
* removed print lines in trace
* added FlowProcess implementation by Markus
* initial debug of FlowProcess, stuck at parent sending OpenKVStoreRequest to child
* temporary fix for process factory throwing segfault on create
* specify public address in command
* change remote kv store knob to false for jenkins build
* made port 0 open random unused port
* change remote store knob to true for benchmark
* set listening port to randomly opened port
* added print lines for jenkins run open kv store timeout debug
* removed most tracing and print lines
* removed tutorial changes
* update handleIOErrors error handling to handle remote-ikvs cases
* Push all debugging changes
* A version where worker bug exists
* A version where restarting tests fail
* Use both the name and the port to determine the child process
* Remove unnecessary update on local address
* Disable remote-kvs for DiskFailureCycle test
* A version where restarting stuck
* A version where most restarting tests green
* Reset connection with child process explicitly
* Remove change on unnecessary files
* Unify flags from _ to -
* fix merging unexpected changes
* fix trac.error to .errorUnsuppressed
* Add license header
* Remove unnecessary header in FlowProcess.actor.cpp
* Fix Windows build
* Fix Windows build, add missing ;
* Fix a stupid bug caused by code dropped by code merging
* Disable remote kvs by default
* Pass the conn_file path to the flow process, though not needed, but the buildNetwork is difficult to tune
* serialization change on readrange
* Update traces
* Refactor the RemoteIKVS interface
* Format files
* Update sim2 interface to not clog connections between parent and child processes in simulation
* Update comments; remove debugging symbols; Add error handling for remote_kvs_cancelled
* Add comments, format files
* Change method name from isBuggifyDisabled to isStableConnection; Decrease(0.1x) latency for stable connections
* Commit the IConnection interface change, forgot in previous commit
* Fix the issue that onClosed request is cancelled by ActorCollection
* Enable the remote kv store knob
* Remove FlowProcess.actor.cpp and move functions to RemoteIKeyValueStore.actor.cpp; Add remote kv store delay to avoid race; Bind the child process to die with parent process
* Fix the bug where one process starts storage server more than once
* Add a please_reboot_remote_kv_store error to restart the storage server worker if remote kvs died abnormally
* Remove unreachable code path and add comments
* Clang format the code
* Fix a simple wait error
* Clang format after merging the main branch
* Testing mixed mode in simulation if remote_kvs knob is enabled, setting the default to false
* Disable remote kvs for PhysicalShardMove which is for RocksDB
* Cleanup #include orders, remove debugging traces
* Revert the reorder in fdbserver.actor.cpp, which fails the gcc build
Co-authored-by: “Lincoln <“lincoln.xiao@snowflake.com”>
2022-04-01 08:08:59 +08:00
|
|
|
case OPT_FLOW_PROCESS_NAME:
|
|
|
|
flowProcessName = args.OptionArg();
|
|
|
|
std::cout << flowProcessName << std::endl;
|
|
|
|
break;
|
|
|
|
case OPT_FLOW_PROCESS_ENDPOINT: {
|
|
|
|
std::vector<std::string> strings;
|
|
|
|
std::cout << args.OptionArg() << std::endl;
|
|
|
|
boost::split(strings, args.OptionArg(), [](char c) { return c == ','; });
|
|
|
|
for (auto& str : strings) {
|
|
|
|
std::cout << str << " ";
|
|
|
|
}
|
|
|
|
std::cout << "\n";
|
|
|
|
if (strings.size() != 3) {
|
|
|
|
std::cerr << "Invalid argument, expected 3 elements in --process-endpoint got " << strings.size()
|
|
|
|
<< std::endl;
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
auto addr = NetworkAddress::parse(strings[0]);
|
|
|
|
uint64_t fst = std::stoul(strings[1]);
|
|
|
|
uint64_t snd = std::stoul(strings[2]);
|
|
|
|
UID token(fst, snd);
|
|
|
|
NetworkAddressList l;
|
|
|
|
l.address = addr;
|
|
|
|
flowProcessEndpoint = Endpoint(l, token);
|
|
|
|
std::cout << "flowProcessEndpoint: " << flowProcessEndpoint.getPrimaryAddress().toString()
|
|
|
|
<< ", token: " << flowProcessEndpoint.token.toString() << "\n";
|
|
|
|
} catch (Error& e) {
|
|
|
|
std::cerr << "Could not parse network address " << strings[0] << std::endl;
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
} catch (std::exception& e) {
|
|
|
|
std::cerr << "Could not parse token " << strings[1] << "," << strings[2] << std::endl;
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2021-11-24 04:10:20 +08:00
|
|
|
case OPT_PRINT_SIMTIME:
|
|
|
|
printSimTime = true;
|
|
|
|
break;
|
2020-01-29 09:53:41 +08:00
|
|
|
|
2020-03-07 10:22:46 +08:00
|
|
|
case TLSConfig::OPT_TLS_PLUGIN:
|
2019-08-06 08:08:42 +08:00
|
|
|
args.OptionArg();
|
|
|
|
break;
|
2020-03-07 10:22:46 +08:00
|
|
|
case TLSConfig::OPT_TLS_CERTIFICATES:
|
|
|
|
tlsConfig.setCertificatePath(args.OptionArg());
|
2019-08-06 08:08:42 +08:00
|
|
|
break;
|
2020-03-07 10:22:46 +08:00
|
|
|
case TLSConfig::OPT_TLS_PASSWORD:
|
|
|
|
tlsConfig.setPassword(args.OptionArg());
|
2019-08-06 08:08:42 +08:00
|
|
|
break;
|
2020-03-07 10:22:46 +08:00
|
|
|
case TLSConfig::OPT_TLS_CA_FILE:
|
|
|
|
tlsConfig.setCAPath(args.OptionArg());
|
2019-08-06 08:08:42 +08:00
|
|
|
break;
|
2020-03-07 10:22:46 +08:00
|
|
|
case TLSConfig::OPT_TLS_KEY:
|
|
|
|
tlsConfig.setKeyPath(args.OptionArg());
|
2019-08-06 08:08:42 +08:00
|
|
|
break;
|
2020-03-07 10:22:46 +08:00
|
|
|
case TLSConfig::OPT_TLS_VERIFY_PEERS:
|
|
|
|
tlsConfig.addVerifyPeers(args.OptionArg());
|
2019-08-06 08:08:42 +08:00
|
|
|
break;
|
2022-05-08 04:18:35 +08:00
|
|
|
case OPT_KMS_CONN_DISCOVERY_URL_FILE: {
|
2022-10-18 06:13:21 +08:00
|
|
|
knobs.emplace_back("rest_kms_connector_discover_kms_url_file", args.OptionArg());
|
2022-05-08 04:18:35 +08:00
|
|
|
break;
|
|
|
|
}
|
2022-08-26 01:00:46 +08:00
|
|
|
case OPT_KMS_CONNECTOR_TYPE: {
|
|
|
|
knobs.emplace_back("kms_connector_type", args.OptionArg());
|
|
|
|
break;
|
|
|
|
}
|
2023-03-07 08:06:03 +08:00
|
|
|
case OPT_KMS_REST_ALLOW_NOT_SECURE_CONECTION: {
|
|
|
|
TraceEvent("RESTKmsConnAllowNotSecureConnection");
|
2023-03-09 09:37:39 +08:00
|
|
|
knobs.emplace_back("rest_kms_allow_not_secure_connection", "true");
|
2023-03-07 08:06:03 +08:00
|
|
|
break;
|
|
|
|
}
|
2022-05-08 04:18:35 +08:00
|
|
|
case OPT_KMS_CONN_VALIDATION_TOKEN_DETAILS: {
|
|
|
|
knobs.emplace_back("rest_kms_connector_validation_token_details", args.OptionArg());
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OPT_KMS_CONN_GET_ENCRYPTION_KEYS_ENDPOINT: {
|
|
|
|
knobs.emplace_back("rest_kms_connector_get_encryption_keys_endpoint", args.OptionArg());
|
|
|
|
break;
|
|
|
|
}
|
2023-01-10 02:55:53 +08:00
|
|
|
case OPT_KMS_CONN_GET_LATEST_ENCRYPTION_KEYS_ENDPOINT: {
|
|
|
|
knobs.emplace_back("rest_kms_connector_get_latest_encryption_keys_endpoint", args.OptionArg());
|
|
|
|
break;
|
|
|
|
}
|
2022-10-15 06:49:00 +08:00
|
|
|
case OPT_KMS_CONN_GET_BLOB_METADATA_ENDPOINT: {
|
|
|
|
knobs.emplace_back("rest_kms_connector_get_blob_metadata_endpoint", args.OptionArg());
|
|
|
|
break;
|
|
|
|
}
|
2022-07-26 13:12:28 +08:00
|
|
|
case OPT_NEW_CLUSTER_KEY: {
|
|
|
|
newClusterKey = args.OptionArg();
|
|
|
|
try {
|
|
|
|
ClusterConnectionString ccs;
|
|
|
|
// make sure the new cluster key is in valid format
|
|
|
|
ccs.parseKey(newClusterKey);
|
|
|
|
} catch (Error& e) {
|
|
|
|
std::cerr << "Invalid cluster key(description:id) '" << newClusterKey << "' from --new-cluster-key"
|
|
|
|
<< std::endl;
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2022-07-29 01:13:58 +08:00
|
|
|
case OPT_AUTHZ_PUBLIC_KEY_FILE: {
|
|
|
|
authzPublicKeyFile = args.OptionArg();
|
|
|
|
break;
|
|
|
|
}
|
2022-08-08 23:29:49 +08:00
|
|
|
case OPT_USE_FUTURE_PROTOCOL_VERSION: {
|
|
|
|
if (!strcmp(args.OptionArg(), "true")) {
|
|
|
|
::useFutureProtocolVersion();
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-04 06:10:04 +08:00
|
|
|
try {
|
|
|
|
ProfilerConfig::instance().reset(profilerConfig);
|
|
|
|
} catch (ConfigError& e) {
|
|
|
|
printf("Error seting up profiler: %s", e.description.c_str());
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
if (seedConnString.length() && seedConnFile.length()) {
|
2019-08-06 08:08:42 +08:00
|
|
|
fprintf(
|
2021-12-15 01:59:14 +08:00
|
|
|
stderr, "%s\n", "--seed-cluster-file and --seed-connection-string may not both be specified at once.");
|
2019-08-06 08:08:42 +08:00
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool seedSpecified = seedConnFile.length() || seedConnString.length();
|
|
|
|
|
2019-08-06 08:08:42 +08:00
|
|
|
if (seedSpecified && !connFile.length()) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"%s\n",
|
2021-12-15 01:59:14 +08:00
|
|
|
"If -seed-cluster-file or --seed-connection-string is specified, -C must be specified as well.");
|
2019-08-06 08:08:42 +08:00
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2019-08-06 08:08:42 +08:00
|
|
|
if (metricsConnFile == connFile)
|
|
|
|
metricsConnFile = "";
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-08-06 08:08:42 +08:00
|
|
|
if (metricsConnFile != "" && metricsPrefix == "") {
|
2017-05-26 04:48:44 +08:00
|
|
|
fprintf(stderr, "If a metrics cluster file is specified, a metrics prefix is required.\n");
|
2019-08-06 08:08:42 +08:00
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2019-08-06 08:08:42 +08:00
|
|
|
bool autoPublicAddress =
|
|
|
|
std::any_of(publicAddressStrs.begin(), publicAddressStrs.end(), [](const std::string& addr) {
|
2022-09-20 02:35:58 +08:00
|
|
|
return StringRef(addr).startsWith("auto:"_sr);
|
2019-08-06 08:08:42 +08:00
|
|
|
});
|
2020-11-15 15:06:46 +08:00
|
|
|
if ((role != ServerRole::Simulation && role != ServerRole::CreateTemplateDatabase &&
|
2021-04-05 12:36:05 +08:00
|
|
|
role != ServerRole::KVFileIntegrityCheck && role != ServerRole::KVFileGenerateIOLogChecksums &&
|
2022-01-26 13:51:22 +08:00
|
|
|
role != ServerRole::KVFileDump && role != ServerRole::UnitTests) ||
|
2019-08-06 08:08:42 +08:00
|
|
|
autoPublicAddress) {
|
|
|
|
|
|
|
|
if (seedSpecified && !fileExists(connFile)) {
|
|
|
|
std::string connectionString = seedConnString.length() ? seedConnString : "";
|
|
|
|
ClusterConnectionString ccs;
|
|
|
|
if (seedConnFile.length()) {
|
2017-05-26 04:48:44 +08:00
|
|
|
try {
|
2019-08-06 08:08:42 +08:00
|
|
|
connectionString = readFileBytes(seedConnFile, MAX_CLUSTER_FILE_BYTES);
|
2017-05-26 04:48:44 +08:00
|
|
|
} catch (Error& e) {
|
2019-08-06 08:08:42 +08:00
|
|
|
fprintf(stderr,
|
|
|
|
"%s\n",
|
|
|
|
ClusterConnectionFile::getErrorString(std::make_pair(seedConnFile, false), e).c_str());
|
2017-05-26 04:48:44 +08:00
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-06 08:08:42 +08:00
|
|
|
try {
|
|
|
|
ccs = ClusterConnectionString(connectionString);
|
|
|
|
} catch (Error& e) {
|
|
|
|
fprintf(stderr, "%s\n", ClusterConnectionString::getErrorString(connectionString, e).c_str());
|
|
|
|
throw;
|
|
|
|
}
|
2021-05-12 23:54:55 +08:00
|
|
|
connectionFile = makeReference<ClusterConnectionFile>(connFile, ccs);
|
2019-08-06 08:08:42 +08:00
|
|
|
} else {
|
|
|
|
std::pair<std::string, bool> resolvedClusterFile;
|
|
|
|
try {
|
|
|
|
resolvedClusterFile = ClusterConnectionFile::lookupClusterFileName(connFile);
|
2020-11-07 15:50:55 +08:00
|
|
|
connectionFile = makeReference<ClusterConnectionFile>(resolvedClusterFile.first);
|
2019-08-06 08:08:42 +08:00
|
|
|
} catch (Error& e) {
|
|
|
|
fprintf(stderr, "%s\n", ClusterConnectionFile::getErrorString(resolvedClusterFile, e).c_str());
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
// failmon?
|
|
|
|
}
|
|
|
|
|
2020-11-15 15:06:46 +08:00
|
|
|
if (role == ServerRole::Simulation) {
|
2019-08-06 08:08:42 +08:00
|
|
|
Optional<bool> buggifyOverride = checkBuggifyOverride(testFile);
|
|
|
|
if (buggifyOverride.present())
|
|
|
|
buggifyEnabled = buggifyOverride.get();
|
|
|
|
}
|
|
|
|
|
2020-11-15 15:06:46 +08:00
|
|
|
if (role == ServerRole::SearchMutations && !targetKey) {
|
2019-08-06 08:08:42 +08:00
|
|
|
fprintf(stderr, "ERROR: please specify a target key\n");
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-11-15 15:06:46 +08:00
|
|
|
if (role == ServerRole::NetworkTestClient && !testServersStr.size()) {
|
2019-08-06 08:08:42 +08:00
|
|
|
fprintf(stderr, "ERROR: please specify --testservers\n");
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2022-07-26 13:12:28 +08:00
|
|
|
if (role == ServerRole::ChangeClusterKey) {
|
|
|
|
bool error = false;
|
|
|
|
if (!newClusterKey.size()) {
|
|
|
|
fprintf(stderr, "ERROR: please specify --new-cluster-key\n");
|
|
|
|
error = true;
|
|
|
|
} else if (connectionFile->getConnectionString().clusterKey() == newClusterKey) {
|
|
|
|
fprintf(stderr, "ERROR: the new cluster key is the same as the old one\n");
|
|
|
|
error = true;
|
|
|
|
}
|
|
|
|
if (error) {
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-06 08:08:42 +08:00
|
|
|
// Interpret legacy "maxLogs" option in the most sensible and unsurprising way we can while eliminating its code
|
|
|
|
// path
|
|
|
|
if (maxLogsSet) {
|
|
|
|
if (maxLogsSizeSet) {
|
|
|
|
// This is the case where both options are set and we must deconflict.
|
|
|
|
auto maxLogsAsSize = maxLogs * rollsize;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-08-06 08:08:42 +08:00
|
|
|
// If either was unlimited, then the safe option here is to take the larger one.
|
|
|
|
// This means that is one of the two options specified a limited amount of logging
|
|
|
|
// then the option that specified "unlimited" will be ignored.
|
|
|
|
if (maxLogsSize == 0 || maxLogs == 0)
|
|
|
|
maxLogsSize = std::max(maxLogsSize, maxLogsAsSize);
|
|
|
|
else
|
|
|
|
maxLogsSize = std::min(maxLogsSize, maxLogs * rollsize);
|
|
|
|
} else {
|
|
|
|
maxLogsSize = maxLogs * rollsize;
|
|
|
|
}
|
2017-09-16 08:55:01 +08:00
|
|
|
}
|
2020-11-14 02:00:38 +08:00
|
|
|
if (!zoneId.present() &&
|
|
|
|
!(localities.isPresent(LocalityData::keyZoneId) && localities.isPresent(LocalityData::keyMachineId))) {
|
|
|
|
machineId = getSharedMemoryMachineId().toString();
|
|
|
|
}
|
2019-08-06 08:08:42 +08:00
|
|
|
if (!localities.isPresent(LocalityData::keyZoneId))
|
|
|
|
localities.set(LocalityData::keyZoneId, zoneId.present() ? zoneId : machineId);
|
|
|
|
|
|
|
|
if (!localities.isPresent(LocalityData::keyMachineId))
|
|
|
|
localities.set(LocalityData::keyMachineId, zoneId.present() ? zoneId : machineId);
|
|
|
|
|
|
|
|
if (!localities.isPresent(LocalityData::keyDcId) && dcId.present())
|
|
|
|
localities.set(LocalityData::keyDcId, dcId);
|
|
|
|
}
|
|
|
|
};
|
2022-08-26 07:40:05 +08:00
|
|
|
|
|
|
|
// Returns true iff validation is successful
|
|
|
|
bool validateSimulationDataFiles(std::string const& dataFolder, bool isRestarting) {
|
|
|
|
std::vector<std::string> files = platform::listFiles(dataFolder);
|
|
|
|
if (!isRestarting) {
|
|
|
|
for (const auto& file : files) {
|
|
|
|
if (file != "restartInfo.ini" && file != getTestEncryptionFileName()) {
|
|
|
|
TraceEvent(SevError, "IncompatibleFileFound").detail("DataFolder", dataFolder).detail("FileName", file);
|
|
|
|
fprintf(stderr,
|
|
|
|
"ERROR: Data folder `%s' is non-empty; please use clean, fdb-only folder\n",
|
|
|
|
dataFolder.c_str());
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (isRestarting && files.empty()) {
|
|
|
|
TraceEvent(SevWarnAlways, "FileNotFound").detail("DataFolder", dataFolder);
|
|
|
|
printf("ERROR: Data folder `%s' is empty, but restarting option selected. Run Phase 1 test first\n",
|
|
|
|
dataFolder.c_str());
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-08-06 08:08:42 +08:00
|
|
|
} // namespace
|
|
|
|
|
|
|
|
int main(int argc, char* argv[]) {
|
2021-06-04 06:10:04 +08:00
|
|
|
// TODO: Remove later, this is just to force the statics to be initialized
|
|
|
|
// otherwise the unit test won't run
|
2021-07-24 08:18:13 +08:00
|
|
|
#ifdef ENABLE_SAMPLING
|
2021-06-04 06:10:04 +08:00
|
|
|
ActorLineageSet _;
|
2021-07-24 08:18:13 +08:00
|
|
|
#endif
|
2019-08-06 08:08:42 +08:00
|
|
|
try {
|
|
|
|
platformInit();
|
|
|
|
|
|
|
|
#ifdef ALLOC_INSTRUMENTATION
|
|
|
|
g_extra_memory = new uint8_t[1000000];
|
|
|
|
#endif
|
|
|
|
registerCrashHandler();
|
|
|
|
|
|
|
|
// Set default of line buffering standard out and error
|
2020-08-19 05:18:50 +08:00
|
|
|
setvbuf(stdout, nullptr, _IOLBF, BUFSIZ);
|
|
|
|
setvbuf(stderr, nullptr, _IOLBF, BUFSIZ);
|
2019-08-06 08:08:42 +08:00
|
|
|
|
|
|
|
// Enables profiling on this thread (but does not start it)
|
|
|
|
registerThreadForProfiling();
|
|
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
// Windows needs a gentle nudge to format floats correctly
|
|
|
|
//_set_output_format(_TWO_DIGIT_EXPONENT);
|
|
|
|
#endif
|
|
|
|
|
2022-02-23 03:15:44 +08:00
|
|
|
auto opts = CLIOptions::parseArgs(argc, argv);
|
2019-08-06 08:08:42 +08:00
|
|
|
const auto role = opts.role;
|
|
|
|
|
2022-07-20 22:02:19 +08:00
|
|
|
if (role == ServerRole::Simulation) {
|
2020-11-15 15:06:46 +08:00
|
|
|
printf("Random seed is %u...\n", opts.randomSeed);
|
2022-07-20 22:02:19 +08:00
|
|
|
bindDeterministicRandomToOpenssl();
|
|
|
|
}
|
2019-08-06 08:08:42 +08:00
|
|
|
|
|
|
|
if (opts.zoneId.present())
|
|
|
|
printf("ZoneId set to %s, dcId to %s\n", printable(opts.zoneId).c_str(), printable(opts.dcId).c_str());
|
|
|
|
|
|
|
|
setThreadLocalDeterministicRandomSeed(opts.randomSeed);
|
|
|
|
|
|
|
|
enableBuggify(opts.buggifyEnabled, BuggifyType::General);
|
2021-07-24 07:28:20 +08:00
|
|
|
enableFaultInjection(opts.faultInjectionEnabled);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-06-10 13:33:00 +08:00
|
|
|
IKnobCollection::setGlobalKnobCollection(IKnobCollection::Type::SERVER,
|
2021-07-17 15:11:40 +08:00
|
|
|
Randomize::True,
|
|
|
|
role == ServerRole::Simulation ? IsSimulated::True
|
|
|
|
: IsSimulated::False);
|
2022-03-25 06:17:02 +08:00
|
|
|
auto& g_knobs = IKnobCollection::getMutableGlobalKnobCollection();
|
|
|
|
g_knobs.setKnob("log_directory", KnobValue::create(opts.logFolder));
|
|
|
|
g_knobs.setKnob("conn_file", KnobValue::create(opts.connFile));
|
2022-04-07 11:06:24 +08:00
|
|
|
if (role != ServerRole::Simulation && opts.memLimit > 0) {
|
|
|
|
g_knobs.setKnob("commit_batches_mem_bytes_hard_limit",
|
|
|
|
KnobValue::create(static_cast<int64_t>(opts.memLimit)));
|
2018-06-02 06:21:40 +08:00
|
|
|
}
|
2021-06-03 01:04:46 +08:00
|
|
|
|
2022-03-25 06:17:02 +08:00
|
|
|
IKnobCollection::setupKnobs(opts.knobs);
|
2022-04-07 11:06:24 +08:00
|
|
|
g_knobs.setKnob("server_mem_limit", KnobValue::create(static_cast<int64_t>(opts.memLimit)));
|
2020-04-02 04:59:06 +08:00
|
|
|
// Reinitialize knobs in order to update knobs that are dependent on explicitly set knobs
|
2022-03-25 06:17:02 +08:00
|
|
|
g_knobs.initialize(Randomize::True, role == ServerRole::Simulation ? IsSimulated::True : IsSimulated::False);
|
2020-04-02 04:59:06 +08:00
|
|
|
|
2022-06-01 07:04:28 +08:00
|
|
|
if (!SERVER_KNOBS->ALLOW_DANGEROUS_KNOBS) {
|
2022-07-14 06:00:19 +08:00
|
|
|
if (SERVER_KNOBS->REMOTE_KV_STORE) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"ERROR : explicitly setting REMOTE_KV_STORE is dangerous! set ALLOW_DANGEROUS_KNOBS to "
|
|
|
|
"proceed anyways\n");
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
2022-06-01 07:04:28 +08:00
|
|
|
}
|
|
|
|
|
2019-05-13 14:50:04 +08:00
|
|
|
// evictionPolicyStringToEnum will throw an exception if the string is not recognized as a valid
|
2020-11-22 04:55:03 +08:00
|
|
|
EvictablePageCache::evictionPolicyStringToEnum(FLOW_KNOBS->CACHE_EVICTION_POLICY);
|
2019-05-07 14:32:57 +08:00
|
|
|
|
2022-04-07 11:06:24 +08:00
|
|
|
if (opts.memLimit > 0 && opts.virtualMemLimit > 0 && opts.memLimit > opts.virtualMemLimit) {
|
|
|
|
fprintf(stderr, "ERROR : --memory-vsize has to be no less than --memory");
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (opts.memLimit > 0 && opts.memLimit <= FLOW_KNOBS->PAGE_CACHE_4K) {
|
2021-12-15 01:59:14 +08:00
|
|
|
fprintf(stderr, "ERROR: --memory has to be larger than --cache-memory\n");
|
2019-09-14 03:54:37 +08:00
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
|
2020-11-15 15:06:46 +08:00
|
|
|
if (role == ServerRole::SkipListTest) {
|
2017-05-26 04:48:44 +08:00
|
|
|
skipListTest();
|
|
|
|
flushAndExit(FDB_EXIT_SUCCESS);
|
|
|
|
}
|
|
|
|
|
2020-11-15 15:06:46 +08:00
|
|
|
if (role == ServerRole::DSLTest) {
|
2017-05-26 04:48:44 +08:00
|
|
|
dsltest();
|
|
|
|
flushAndExit(FDB_EXIT_SUCCESS);
|
|
|
|
}
|
|
|
|
|
2020-11-15 15:06:46 +08:00
|
|
|
if (role == ServerRole::VersionedMapTest) {
|
2017-05-26 04:48:44 +08:00
|
|
|
versionedMapTest();
|
|
|
|
flushAndExit(FDB_EXIT_SUCCESS);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Initialize the thread pool
|
|
|
|
CoroThreadPool::init();
|
|
|
|
// Ordinarily, this is done when the network is run. However, network thread should be set before TraceEvents
|
|
|
|
// are logged. This thread will eventually run the network, so call it now.
|
2018-06-21 00:21:23 +08:00
|
|
|
TraceEvent::setNetworkThread();
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2018-10-23 08:57:09 +08:00
|
|
|
std::vector<Future<Void>> listenErrors;
|
|
|
|
|
2020-11-15 15:06:46 +08:00
|
|
|
if (role == ServerRole::Simulation || role == ServerRole::CreateTemplateDatabase) {
|
2017-05-26 04:48:44 +08:00
|
|
|
// startOldSimulator();
|
2022-02-23 03:15:44 +08:00
|
|
|
opts.buildNetwork(argv[0]);
|
2021-11-24 04:10:20 +08:00
|
|
|
startNewSimulator(opts.printSimTime);
|
2023-02-02 17:00:51 +08:00
|
|
|
openTraceFile({}, opts.rollsize, opts.maxLogsSize, opts.logFolder, "trace", opts.logGroup);
|
2021-03-03 02:35:37 +08:00
|
|
|
openTracer(TracerType(deterministicRandom()->randomInt(static_cast<int>(TracerType::DISABLED),
|
|
|
|
static_cast<int>(TracerType::SIM_END))));
|
2017-05-26 04:48:44 +08:00
|
|
|
} else {
|
2020-03-07 10:22:46 +08:00
|
|
|
g_network = newNet2(opts.tlsConfig, opts.useThreadPool, true);
|
2020-04-20 17:53:07 +08:00
|
|
|
g_network->addStopCallback(Net2FileSystem::stop);
|
2022-02-18 23:48:44 +08:00
|
|
|
FlowTransport::createInstance(false, 1, WLTOKEN_RESERVED_COUNT, &opts.allowList);
|
2022-04-08 04:40:28 +08:00
|
|
|
opts.buildNetwork(argv[0]);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
Remote ikvs debugging (#6465)
* initial structure for remote IKVS server
* moved struct to .h file, added new files to CMakeList
* happy path implementation, connection error when testing
* saved minor local change
* changed tracing to debug
* fixed onClosed and getError being called before init is finished
* fix spawn process bug, now use absolute path
* added server knob to set ikvs process port number
* added server knob for remote/local kv store
* implement simulator remote process spawning
* fixed bug for simulator timeout
* commit all changes
* removed print lines in trace
* added FlowProcess implementation by Markus
* initial debug of FlowProcess, stuck at parent sending OpenKVStoreRequest to child
* temporary fix for process factory throwing segfault on create
* specify public address in command
* change remote kv store knob to false for jenkins build
* made port 0 open random unused port
* change remote store knob to true for benchmark
* set listening port to randomly opened port
* added print lines for jenkins run open kv store timeout debug
* removed most tracing and print lines
* removed tutorial changes
* update handleIOErrors error handling to handle remote-ikvs cases
* Push all debugging changes
* A version where worker bug exists
* A version where restarting tests fail
* Use both the name and the port to determine the child process
* Remove unnecessary update on local address
* Disable remote-kvs for DiskFailureCycle test
* A version where restarting stuck
* A version where most restarting tests green
* Reset connection with child process explicitly
* Remove change on unnecessary files
* Unify flags from _ to -
* fix merging unexpected changes
* fix trac.error to .errorUnsuppressed
* Add license header
* Remove unnecessary header in FlowProcess.actor.cpp
* Fix Windows build
* Fix Windows build, add missing ;
* Fix a stupid bug caused by code dropped by code merging
* Disable remote kvs by default
* Pass the conn_file path to the flow process, though not needed, but the buildNetwork is difficult to tune
* serialization change on readrange
* Update traces
* Refactor the RemoteIKVS interface
* Format files
* Update sim2 interface to not clog connections between parent and child processes in simulation
* Update comments; remove debugging symbols; Add error handling for remote_kvs_cancelled
* Add comments, format files
* Change method name from isBuggifyDisabled to isStableConnection; Decrease(0.1x) latency for stable connections
* Commit the IConnection interface change, forgot in previous commit
* Fix the issue that onClosed request is cancelled by ActorCollection
* Enable the remote kv store knob
* Remove FlowProcess.actor.cpp and move functions to RemoteIKeyValueStore.actor.cpp; Add remote kv store delay to avoid race; Bind the child process to die with parent process
* Fix the bug where one process starts storage server more than once
* Add a please_reboot_remote_kv_store error to restart the storage server worker if remote kvs died abnormally
* Remove unreachable code path and add comments
* Clang format the code
* Fix a simple wait error
* Clang format after merging the main branch
* Testing mixed mode in simulation if remote_kvs knob is enabled, setting the default to false
* Disable remote kvs for PhysicalShardMove which is for RocksDB
* Cleanup #include orders, remove debugging traces
* Revert the reorder in fdbserver.actor.cpp, which fails the gcc build
Co-authored-by: “Lincoln <“lincoln.xiao@snowflake.com”>
2022-04-01 08:08:59 +08:00
|
|
|
const bool expectsPublicAddress = (role == ServerRole::FDBD || role == ServerRole::NetworkTestServer ||
|
|
|
|
role == ServerRole::Restore || role == ServerRole::FlowProcess);
|
2019-08-06 08:08:42 +08:00
|
|
|
if (opts.publicAddressStrs.empty()) {
|
2019-03-19 02:06:13 +08:00
|
|
|
if (expectsPublicAddress) {
|
2021-12-15 01:59:14 +08:00
|
|
|
fprintf(stderr, "ERROR: The -p or --public-address option is required\n");
|
2019-03-19 02:06:13 +08:00
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-06 08:08:42 +08:00
|
|
|
openTraceFile(
|
|
|
|
opts.publicAddresses.address, opts.rollsize, opts.maxLogsSize, opts.logFolder, "trace", opts.logGroup);
|
2020-02-27 10:53:06 +08:00
|
|
|
g_network->initTLS();
|
2022-07-29 01:13:58 +08:00
|
|
|
if (!opts.authzPublicKeyFile.empty()) {
|
2022-08-17 20:57:05 +08:00
|
|
|
try {
|
|
|
|
FlowTransport::transport().loadPublicKeyFile(opts.authzPublicKeyFile);
|
|
|
|
} catch (Error& e) {
|
|
|
|
TraceEvent("AuthzPublicKeySetLoadError").error(e);
|
|
|
|
}
|
2022-07-29 01:13:58 +08:00
|
|
|
FlowTransport::transport().watchPublicKeyFile(opts.authzPublicKeyFile);
|
2022-08-17 20:57:05 +08:00
|
|
|
} else {
|
|
|
|
TraceEvent(SevInfo, "AuthzPublicKeyFileNotSet");
|
2022-07-29 01:13:58 +08:00
|
|
|
}
|
2023-02-28 04:51:13 +08:00
|
|
|
if (FLOW_KNOBS->ALLOW_TOKENLESS_TENANT_ACCESS)
|
|
|
|
TraceEvent(SevWarnAlways, "AuthzTokenlessAccessEnabled");
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-03-19 02:06:13 +08:00
|
|
|
if (expectsPublicAddress) {
|
2019-08-06 08:08:42 +08:00
|
|
|
for (int ii = 0; ii < (opts.publicAddresses.secondaryAddress.present() ? 2 : 1); ++ii) {
|
|
|
|
const NetworkAddress& publicAddress =
|
|
|
|
ii == 0 ? opts.publicAddresses.address : opts.publicAddresses.secondaryAddress.get();
|
|
|
|
const NetworkAddress& listenAddress =
|
|
|
|
ii == 0 ? opts.listenAddresses.address : opts.listenAddresses.secondaryAddress.get();
|
2018-10-31 04:44:37 +08:00
|
|
|
try {
|
|
|
|
const Future<Void>& errorF = FlowTransport::transport().bind(publicAddress, listenAddress);
|
|
|
|
listenErrors.push_back(errorF);
|
|
|
|
if (errorF.isReady())
|
|
|
|
errorF.get();
|
|
|
|
} catch (Error& e) {
|
|
|
|
TraceEvent("BindError").error(e);
|
|
|
|
fprintf(stderr,
|
|
|
|
"Error initializing networking with public address %s and listen address %s (%s)\n",
|
|
|
|
publicAddress.toString().c_str(),
|
|
|
|
listenAddress.toString().c_str(),
|
|
|
|
e.what());
|
|
|
|
printHelpTeaser(argv[0]);
|
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Use a negative ioTimeout to indicate warn-only
|
2019-08-06 08:08:42 +08:00
|
|
|
Net2FileSystem::newFileSystem(opts.fileIoWarnOnly ? -opts.fileIoTimeout : opts.fileIoTimeout,
|
|
|
|
opts.fileSystemPath);
|
2017-05-26 04:48:44 +08:00
|
|
|
g_network->initMetrics();
|
|
|
|
FlowTransport::transport().initMetrics();
|
|
|
|
initTraceEventMetrics();
|
|
|
|
}
|
|
|
|
|
|
|
|
double start = timer(), startNow = now();
|
|
|
|
|
|
|
|
std::string cwd = "<unknown>";
|
|
|
|
try {
|
|
|
|
cwd = platform::getWorkingDirectory();
|
|
|
|
} catch (Error& e) {
|
|
|
|
// Allow for platform error by rethrowing all _other_ errors
|
|
|
|
if (e.code() != error_code_platform_error)
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
|
2022-08-03 02:29:37 +08:00
|
|
|
std::string environmentKnobOptions;
|
2022-08-29 20:15:03 +08:00
|
|
|
for (const std::string& knobOption : getEnvironmentKnobOptions()) {
|
2022-08-03 02:29:37 +08:00
|
|
|
environmentKnobOptions += knobOption + " ";
|
|
|
|
}
|
|
|
|
if (environmentKnobOptions.length()) {
|
|
|
|
environmentKnobOptions.pop_back();
|
|
|
|
}
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
TraceEvent("ProgramStart")
|
2019-08-06 08:08:42 +08:00
|
|
|
.setMaxEventLength(12000)
|
|
|
|
.detail("RandomSeed", opts.randomSeed)
|
2019-11-16 04:26:51 +08:00
|
|
|
.detail("SourceVersion", getSourceVersion())
|
2019-08-06 08:08:42 +08:00
|
|
|
.detail("Version", FDB_VT_VERSION)
|
|
|
|
.detail("PackageName", FDB_VT_PACKAGE_NAME)
|
|
|
|
.detail("FileSystem", opts.fileSystemPath)
|
|
|
|
.detail("DataFolder", opts.dataFolder)
|
|
|
|
.detail("WorkingDirectory", cwd)
|
2021-10-11 11:44:56 +08:00
|
|
|
.detail("ClusterFile", opts.connectionFile ? opts.connectionFile->toString() : "")
|
2019-08-06 08:08:42 +08:00
|
|
|
.detail("ConnectionString",
|
|
|
|
opts.connectionFile ? opts.connectionFile->getConnectionString().toString() : "")
|
2020-08-19 05:18:50 +08:00
|
|
|
.detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(nullptr))
|
2019-08-06 08:08:42 +08:00
|
|
|
.setMaxFieldLength(10000)
|
2022-08-03 02:29:37 +08:00
|
|
|
.detail("EnvironmentKnobOptions", environmentKnobOptions.length() ? environmentKnobOptions : "none")
|
2019-08-06 08:08:42 +08:00
|
|
|
.detail("CommandLine", opts.commandLine)
|
|
|
|
.setMaxFieldLength(0)
|
|
|
|
.detail("BuggifyEnabled", opts.buggifyEnabled)
|
2021-07-24 07:28:20 +08:00
|
|
|
.detail("FaultInjectionEnabled", opts.faultInjectionEnabled)
|
2019-08-06 08:08:42 +08:00
|
|
|
.detail("MemoryLimit", opts.memLimit)
|
2022-04-07 11:06:24 +08:00
|
|
|
.detail("VirtualMemoryLimit", opts.virtualMemLimit)
|
2022-08-08 23:29:49 +08:00
|
|
|
.detail("ProtocolVersion", currentProtocolVersion())
|
2019-08-06 08:08:42 +08:00
|
|
|
.trackLatest("ProgramStart");
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
Error::init();
|
|
|
|
std::set_new_handler(&platform::outOfMemory);
|
2022-04-07 11:06:24 +08:00
|
|
|
Future<Void> memoryUsageMonitor = startMemoryUsageMonitor(opts.memLimit);
|
|
|
|
setMemoryQuota(opts.virtualMemLimit);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
Future<Optional<Void>> f;
|
|
|
|
|
2020-11-15 15:06:46 +08:00
|
|
|
if (role == ServerRole::Simulation) {
|
2019-08-06 08:08:42 +08:00
|
|
|
TraceEvent("Simulation").detail("TestFile", opts.testFile);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-11-13 09:03:41 +08:00
|
|
|
auto histogramReportActor = histogramReport();
|
|
|
|
|
2020-11-22 04:55:03 +08:00
|
|
|
CLIENT_KNOBS->trace();
|
|
|
|
FLOW_KNOBS->trace();
|
|
|
|
SERVER_KNOBS->trace();
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-08-06 08:08:42 +08:00
|
|
|
auto dataFolder = opts.dataFolder.size() ? opts.dataFolder : "simfdb";
|
2017-05-26 04:48:44 +08:00
|
|
|
std::vector<std::string> directories = platform::listDirectories(dataFolder);
|
2022-11-05 02:26:44 +08:00
|
|
|
const std::set<std::string> allowedDirectories = { ".", "..", "backups", "unittests", "fdbblob" };
|
2021-07-18 18:06:04 +08:00
|
|
|
|
2021-07-19 02:08:27 +08:00
|
|
|
for (const auto& dir : directories) {
|
2021-07-18 18:06:04 +08:00
|
|
|
if (dir.size() != 32 && allowedDirectories.count(dir) == 0 && dir.find("snap") == std::string::npos) {
|
|
|
|
|
2019-09-10 01:21:16 +08:00
|
|
|
TraceEvent(SevError, "IncompatibleDirectoryFound")
|
|
|
|
.detail("DataFolder", dataFolder)
|
2021-07-18 18:06:04 +08:00
|
|
|
.detail("SuspiciousFile", dir);
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
fprintf(stderr,
|
|
|
|
"ERROR: Data folder `%s' had non fdb file `%s'; please use clean, fdb-only folder\n",
|
|
|
|
dataFolder.c_str(),
|
2021-07-18 18:06:04 +08:00
|
|
|
dir.c_str());
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
2021-07-18 18:06:04 +08:00
|
|
|
}
|
2022-08-26 07:40:05 +08:00
|
|
|
|
|
|
|
if (!validateSimulationDataFiles(dataFolder, opts.restarting)) {
|
2017-05-26 04:48:44 +08:00
|
|
|
flushAndExit(FDB_EXIT_ERROR);
|
|
|
|
}
|
|
|
|
|
2019-03-26 09:31:08 +08:00
|
|
|
int isRestoring = 0;
|
2019-08-06 08:08:42 +08:00
|
|
|
if (!opts.restarting) {
|
2017-05-26 04:48:44 +08:00
|
|
|
platform::eraseDirectoryRecursive(dataFolder);
|
|
|
|
platform::createDirectory(dataFolder);
|
2019-03-21 07:51:14 +08:00
|
|
|
} else {
|
2019-03-11 22:31:44 +08:00
|
|
|
CSimpleIni ini;
|
|
|
|
ini.SetUnicode();
|
2019-05-07 09:57:26 +08:00
|
|
|
std::string absDataFolder = abspath(dataFolder);
|
|
|
|
ini.LoadFile(joinPath(absDataFolder, "restartInfo.ini").c_str());
|
2019-05-23 04:38:56 +08:00
|
|
|
int backupFailed = true;
|
2020-08-19 05:18:50 +08:00
|
|
|
const char* isRestoringStr = ini.GetValue("RESTORE", "isRestoring", nullptr);
|
2019-05-23 04:38:56 +08:00
|
|
|
if (isRestoringStr) {
|
|
|
|
isRestoring = atoi(isRestoringStr);
|
2020-08-19 05:18:50 +08:00
|
|
|
const char* backupFailedStr = ini.GetValue("RESTORE", "BackupFailed", nullptr);
|
2019-05-23 04:38:56 +08:00
|
|
|
if (isRestoring && backupFailedStr) {
|
|
|
|
backupFailed = atoi(backupFailedStr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (isRestoring && !backupFailed) {
|
2019-07-23 06:44:49 +08:00
|
|
|
std::vector<std::string> returnList;
|
|
|
|
std::string ext = "";
|
|
|
|
returnList = platform::listDirectories(absDataFolder);
|
|
|
|
std::string snapStr = ini.GetValue("RESTORE", "RestoreSnapUID");
|
|
|
|
|
|
|
|
TraceEvent("RestoringDataFolder").detail("DataFolder", absDataFolder);
|
|
|
|
TraceEvent("RestoreSnapUID").detail("UID", snapStr);
|
|
|
|
|
|
|
|
// delete all files (except fdb.cluster) in non-snap directories
|
|
|
|
for (const auto& dirEntry : returnList) {
|
|
|
|
if (dirEntry == "." || dirEntry == "..") {
|
|
|
|
continue;
|
2019-03-21 07:51:14 +08:00
|
|
|
}
|
2019-07-23 06:44:49 +08:00
|
|
|
if (dirEntry.find(snapStr) != std::string::npos) {
|
|
|
|
continue;
|
2019-03-21 07:51:14 +08:00
|
|
|
}
|
2019-03-11 22:31:44 +08:00
|
|
|
|
2019-07-23 06:44:49 +08:00
|
|
|
std::string childf = absDataFolder + "/" + dirEntry;
|
|
|
|
std::vector<std::string> returnFiles = platform::listFiles(childf, ext);
|
|
|
|
for (const auto& fileEntry : returnFiles) {
|
|
|
|
if (fileEntry != "fdb.cluster" && fileEntry != "fitness") {
|
|
|
|
TraceEvent("DeletingNonSnapfiles").detail("FileBeingDeleted", childf + "/" + fileEntry);
|
|
|
|
deleteFile(childf + "/" + fileEntry);
|
2019-03-21 07:51:14 +08:00
|
|
|
}
|
2019-03-11 22:31:44 +08:00
|
|
|
}
|
2019-07-23 06:44:49 +08:00
|
|
|
}
|
|
|
|
// cleanup unwanted and partial directories
|
|
|
|
for (const auto& dirEntry : returnList) {
|
|
|
|
if (dirEntry == "." || dirEntry == "..") {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
std::string dirSrc = absDataFolder + "/" + dirEntry;
|
|
|
|
// delete snap directories which are not part of restoreSnapUID
|
|
|
|
if (dirEntry.find(snapStr) == std::string::npos) {
|
|
|
|
if (dirEntry.find("snap") != std::string::npos) {
|
2019-05-07 09:57:26 +08:00
|
|
|
platform::eraseDirectoryRecursive(dirSrc);
|
2019-03-21 07:51:14 +08:00
|
|
|
}
|
2019-07-23 06:44:49 +08:00
|
|
|
continue;
|
2019-03-11 22:31:44 +08:00
|
|
|
}
|
2019-07-23 06:44:49 +08:00
|
|
|
// remove empty/partial snap directories
|
|
|
|
std::vector<std::string> childrenList = platform::listFiles(dirSrc);
|
|
|
|
if (childrenList.size() == 0) {
|
|
|
|
TraceEvent("RemovingEmptySnapDirectory").detail("DirBeingDeleted", dirSrc);
|
|
|
|
platform::eraseDirectoryRecursive(dirSrc);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// move snapshotted files to appropriate locations
|
|
|
|
for (const auto& dirEntry : returnList) {
|
|
|
|
if (dirEntry == "." || dirEntry == "..") {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
std::string dirSrc = absDataFolder + "/" + dirEntry;
|
|
|
|
std::string origDir = dirEntry.substr(0, 32);
|
|
|
|
std::string dirToMove = absDataFolder + "/" + origDir;
|
|
|
|
if ((dirEntry.find("snap") != std::string::npos) &&
|
|
|
|
(dirEntry.find("tlog") != std::string::npos)) {
|
|
|
|
// restore tlog files
|
|
|
|
restoreRoleFilesHelper(dirSrc, dirToMove, "log");
|
|
|
|
} else if ((dirEntry.find("snap") != std::string::npos) &&
|
|
|
|
(dirEntry.find("storage") != std::string::npos)) {
|
|
|
|
// restore storage files
|
|
|
|
restoreRoleFilesHelper(dirSrc, dirToMove, "storage");
|
|
|
|
} else if ((dirEntry.find("snap") != std::string::npos) &&
|
|
|
|
(dirEntry.find("coord") != std::string::npos)) {
|
|
|
|
// restore coordinator files
|
|
|
|
restoreRoleFilesHelper(dirSrc, dirToMove, "coordination");
|
2019-05-07 09:57:26 +08:00
|
|
|
}
|
2019-03-11 22:31:44 +08:00
|
|
|
}
|
|
|
|
}
|
2022-07-15 09:16:45 +08:00
|
|
|
g_knobs.setKnob("enable_blob_granule_compression",
|
|
|
|
KnobValue::create(ini.GetBoolValue("META", "enableBlobGranuleEncryption", false)));
|
2023-01-26 07:54:40 +08:00
|
|
|
g_knobs.setKnob("encrypt_header_auth_token_enabled",
|
|
|
|
KnobValue::create(ini.GetBoolValue("META", "encryptHeaderAuthTokenEnabled", false)));
|
|
|
|
g_knobs.setKnob("encrypt_header_auth_token_algo",
|
|
|
|
KnobValue::create((int)ini.GetLongValue(
|
|
|
|
"META", "encryptHeaderAuthTokenAlgo", FLOW_KNOBS->ENCRYPT_HEADER_AUTH_TOKEN_ALGO)));
|
2023-02-17 11:01:59 +08:00
|
|
|
g_knobs.setKnob("enable_configurable_encryption",
|
|
|
|
KnobValue::create(ini.GetBoolValue("META",
|
|
|
|
"enableConfigurableEncryption",
|
|
|
|
CLIENT_KNOBS->ENABLE_CONFIGURABLE_ENCRYPTION)));
|
|
|
|
|
2023-02-15 01:57:08 +08:00
|
|
|
g_knobs.setKnob(
|
|
|
|
"shard_encode_location_metadata",
|
|
|
|
KnobValue::create(ini.GetBoolValue("META", "enableShardEncodeLocationMetadata", false)));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2020-02-22 11:09:16 +08:00
|
|
|
setupAndRun(dataFolder, opts.testFile, opts.restarting, (isRestoring >= 1), opts.whitelistBinPaths);
|
2022-09-15 08:10:49 +08:00
|
|
|
g_simulator->run();
|
2020-11-15 15:06:46 +08:00
|
|
|
} else if (role == ServerRole::FDBD) {
|
2020-06-05 02:18:42 +08:00
|
|
|
// Update the global blob credential files list so that both fast
|
|
|
|
// restore workers and backup workers can access blob storage.
|
|
|
|
std::vector<std::string>* pFiles =
|
|
|
|
(std::vector<std::string>*)g_network->global(INetwork::enBlobCredentialFiles);
|
|
|
|
if (pFiles != nullptr) {
|
|
|
|
for (auto& f : opts.blobCredentials) {
|
|
|
|
pFiles->push_back(f);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-29 09:53:41 +08:00
|
|
|
// Call fast restore for the class FastRestoreClass. This is a short-cut to run fast restore in circus
|
|
|
|
if (opts.processClass == ProcessClass::FastRestoreClass) {
|
|
|
|
printf("Run as fast restore worker\n");
|
2020-02-01 12:23:35 +08:00
|
|
|
ASSERT(opts.connectionFile);
|
|
|
|
auto dataFolder = opts.dataFolder;
|
|
|
|
if (!dataFolder.size())
|
|
|
|
dataFolder = format("fdb/%d/", opts.publicAddresses.address.port); // SOMEDAY: Better default
|
2020-01-29 09:53:41 +08:00
|
|
|
|
2021-09-17 08:42:34 +08:00
|
|
|
std::vector<Future<Void>> actors(listenErrors.begin(), listenErrors.end());
|
2020-02-04 03:11:31 +08:00
|
|
|
actors.push_back(restoreWorker(opts.connectionFile, opts.localities, dataFolder));
|
2020-02-01 12:23:35 +08:00
|
|
|
f = stopAfter(waitForAll(actors));
|
2020-05-08 06:06:59 +08:00
|
|
|
printf("Fast restore worker started\n");
|
2020-01-31 10:49:01 +08:00
|
|
|
g_network->run();
|
|
|
|
printf("g_network->run() done\n");
|
2020-01-29 09:53:41 +08:00
|
|
|
} else { // Call fdbd roles in conventional way
|
|
|
|
ASSERT(opts.connectionFile);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-04-15 05:02:24 +08:00
|
|
|
setupRunLoopProfiler();
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-01-29 09:53:41 +08:00
|
|
|
auto dataFolder = opts.dataFolder;
|
|
|
|
if (!dataFolder.size())
|
|
|
|
dataFolder = format("fdb/%d/", opts.publicAddresses.address.port); // SOMEDAY: Better default
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-09-17 08:42:34 +08:00
|
|
|
std::vector<Future<Void>> actors(listenErrors.begin(), listenErrors.end());
|
2020-01-29 09:53:41 +08:00
|
|
|
actors.push_back(fdbd(opts.connectionFile,
|
|
|
|
opts.localities,
|
|
|
|
opts.processClass,
|
|
|
|
dataFolder,
|
|
|
|
dataFolder,
|
|
|
|
opts.storageMemLimit,
|
|
|
|
opts.metricsConnFile,
|
|
|
|
opts.metricsPrefix,
|
|
|
|
opts.rsssize,
|
2021-05-12 09:23:33 +08:00
|
|
|
opts.whitelistBinPaths,
|
2021-05-18 10:14:32 +08:00
|
|
|
opts.configPath,
|
|
|
|
opts.manualKnobOverrides,
|
2021-08-07 14:18:10 +08:00
|
|
|
opts.configDBType));
|
2020-11-25 01:41:36 +08:00
|
|
|
actors.push_back(histogramReport());
|
2020-01-29 09:53:41 +08:00
|
|
|
// actors.push_back( recurring( []{}, .001 ) ); // for ASIO latency measurement
|
|
|
|
|
|
|
|
f = stopAfter(waitForAll(actors));
|
2020-01-31 10:49:01 +08:00
|
|
|
g_network->run();
|
2020-01-29 09:53:41 +08:00
|
|
|
}
|
2020-11-15 15:06:46 +08:00
|
|
|
} else if (role == ServerRole::MultiTester) {
|
2020-09-25 05:14:55 +08:00
|
|
|
setupRunLoopProfiler();
|
2019-08-06 08:08:42 +08:00
|
|
|
f = stopAfter(runTests(opts.connectionFile,
|
|
|
|
TEST_TYPE_FROM_FILE,
|
|
|
|
opts.testOnServers ? TEST_ON_SERVERS : TEST_ON_TESTERS,
|
|
|
|
opts.minTesterCount,
|
|
|
|
opts.testFile,
|
|
|
|
StringRef(),
|
|
|
|
opts.localities));
|
2017-05-26 04:48:44 +08:00
|
|
|
g_network->run();
|
2020-11-15 15:06:46 +08:00
|
|
|
} else if (role == ServerRole::Test) {
|
2022-03-18 06:17:27 +08:00
|
|
|
TraceEvent("NonSimulationTest").detail("TestFile", opts.testFile);
|
2020-09-25 05:14:55 +08:00
|
|
|
setupRunLoopProfiler();
|
2023-01-11 08:37:54 +08:00
|
|
|
auto m =
|
|
|
|
startSystemMonitor(opts.dataFolder, opts.dcId, opts.zoneId, opts.zoneId, opts.localities.dataHallId());
|
2019-08-06 08:08:42 +08:00
|
|
|
f = stopAfter(runTests(
|
|
|
|
opts.connectionFile, TEST_TYPE_FROM_FILE, TEST_HERE, 1, opts.testFile, StringRef(), opts.localities));
|
2018-02-15 01:50:12 +08:00
|
|
|
g_network->run();
|
2020-11-15 15:06:46 +08:00
|
|
|
} else if (role == ServerRole::ConsistencyCheck) {
|
2020-01-29 04:09:37 +08:00
|
|
|
setupRunLoopProfiler();
|
2018-02-15 01:50:12 +08:00
|
|
|
|
2023-01-11 08:37:54 +08:00
|
|
|
auto m =
|
|
|
|
startSystemMonitor(opts.dataFolder, opts.dcId, opts.zoneId, opts.zoneId, opts.localities.dataHallId());
|
2019-08-06 08:08:42 +08:00
|
|
|
f = stopAfter(runTests(opts.connectionFile,
|
|
|
|
TEST_TYPE_CONSISTENCY_CHECK,
|
|
|
|
TEST_HERE,
|
|
|
|
1,
|
|
|
|
opts.testFile,
|
|
|
|
StringRef(),
|
|
|
|
opts.localities));
|
2017-05-26 04:48:44 +08:00
|
|
|
g_network->run();
|
2021-04-05 12:36:05 +08:00
|
|
|
} else if (role == ServerRole::UnitTests) {
|
|
|
|
setupRunLoopProfiler();
|
2023-01-11 08:37:54 +08:00
|
|
|
auto m =
|
|
|
|
startSystemMonitor(opts.dataFolder, opts.dcId, opts.zoneId, opts.zoneId, opts.localities.dataHallId());
|
2021-04-06 17:36:10 +08:00
|
|
|
f = stopAfter(runTests(opts.connectionFile,
|
|
|
|
TEST_TYPE_UNIT_TESTS,
|
|
|
|
TEST_HERE,
|
|
|
|
1,
|
|
|
|
opts.testFile,
|
|
|
|
StringRef(),
|
|
|
|
opts.localities,
|
|
|
|
opts.testParams));
|
2021-04-05 12:36:05 +08:00
|
|
|
g_network->run();
|
2020-11-15 15:06:46 +08:00
|
|
|
} else if (role == ServerRole::CreateTemplateDatabase) {
|
2017-05-26 04:48:44 +08:00
|
|
|
createTemplateDatabase();
|
2020-11-15 15:06:46 +08:00
|
|
|
} else if (role == ServerRole::NetworkTestClient) {
|
2019-08-06 08:08:42 +08:00
|
|
|
f = stopAfter(networkTestClient(opts.testServersStr));
|
2017-05-26 04:48:44 +08:00
|
|
|
g_network->run();
|
2020-11-15 15:06:46 +08:00
|
|
|
} else if (role == ServerRole::NetworkTestServer) {
|
2017-05-26 04:48:44 +08:00
|
|
|
f = stopAfter(networkTestServer());
|
|
|
|
g_network->run();
|
2020-11-15 15:06:46 +08:00
|
|
|
} else if (role == ServerRole::Restore) {
|
2020-02-04 03:11:31 +08:00
|
|
|
f = stopAfter(restoreWorker(opts.connectionFile, opts.localities, opts.dataFolder));
|
2018-10-10 09:47:28 +08:00
|
|
|
g_network->run();
|
2020-11-15 15:06:46 +08:00
|
|
|
} else if (role == ServerRole::KVFileIntegrityCheck) {
|
2019-08-06 08:08:42 +08:00
|
|
|
f = stopAfter(KVFileCheck(opts.kvFile, true));
|
2017-05-27 08:43:28 +08:00
|
|
|
g_network->run();
|
2020-11-15 15:06:46 +08:00
|
|
|
} else if (role == ServerRole::KVFileGenerateIOLogChecksums) {
|
2018-09-11 01:51:41 +08:00
|
|
|
Optional<Void> result;
|
|
|
|
try {
|
2019-08-06 08:08:42 +08:00
|
|
|
GenerateIOLogChecksumFile(opts.kvFile);
|
2018-09-11 01:51:41 +08:00
|
|
|
result = Void();
|
|
|
|
} catch (Error& e) {
|
|
|
|
fprintf(stderr, "Fatal Error: %s\n", e.what());
|
|
|
|
}
|
|
|
|
|
|
|
|
f = result;
|
Remote ikvs debugging (#6465)
* initial structure for remote IKVS server
* moved struct to .h file, added new files to CMakeList
* happy path implementation, connection error when testing
* saved minor local change
* changed tracing to debug
* fixed onClosed and getError being called before init is finished
* fix spawn process bug, now use absolute path
* added server knob to set ikvs process port number
* added server knob for remote/local kv store
* implement simulator remote process spawning
* fixed bug for simulator timeout
* commit all changes
* removed print lines in trace
* added FlowProcess implementation by Markus
* initial debug of FlowProcess, stuck at parent sending OpenKVStoreRequest to child
* temporary fix for process factory throwing segfault on create
* specify public address in command
* change remote kv store knob to false for jenkins build
* made port 0 open random unused port
* change remote store knob to true for benchmark
* set listening port to randomly opened port
* added print lines for jenkins run open kv store timeout debug
* removed most tracing and print lines
* removed tutorial changes
* update handleIOErrors error handling to handle remote-ikvs cases
* Push all debugging changes
* A version where worker bug exists
* A version where restarting tests fail
* Use both the name and the port to determine the child process
* Remove unnecessary update on local address
* Disable remote-kvs for DiskFailureCycle test
* A version where restarting stuck
* A version where most restarting tests green
* Reset connection with child process explicitly
* Remove change on unnecessary files
* Unify flags from _ to -
* fix merging unexpected changes
* fix trac.error to .errorUnsuppressed
* Add license header
* Remove unnecessary header in FlowProcess.actor.cpp
* Fix Windows build
* Fix Windows build, add missing ;
* Fix a stupid bug caused by code dropped by code merging
* Disable remote kvs by default
* Pass the conn_file path to the flow process, though not needed, but the buildNetwork is difficult to tune
* serialization change on readrange
* Update traces
* Refactor the RemoteIKVS interface
* Format files
* Update sim2 interface to not clog connections between parent and child processes in simulation
* Update comments; remove debugging symbols; Add error handling for remote_kvs_cancelled
* Add comments, format files
* Change method name from isBuggifyDisabled to isStableConnection; Decrease(0.1x) latency for stable connections
* Commit the IConnection interface change, forgot in previous commit
* Fix the issue that onClosed request is cancelled by ActorCollection
* Enable the remote kv store knob
* Remove FlowProcess.actor.cpp and move functions to RemoteIKeyValueStore.actor.cpp; Add remote kv store delay to avoid race; Bind the child process to die with parent process
* Fix the bug where one process starts storage server more than once
* Add a please_reboot_remote_kv_store error to restart the storage server worker if remote kvs died abnormally
* Remove unreachable code path and add comments
* Clang format the code
* Fix a simple wait error
* Clang format after merging the main branch
* Testing mixed mode in simulation if remote_kvs knob is enabled, setting the default to false
* Disable remote kvs for PhysicalShardMove which is for RocksDB
* Cleanup #include orders, remove debugging traces
* Revert the reorder in fdbserver.actor.cpp, which fails the gcc build
Co-authored-by: “Lincoln <“lincoln.xiao@snowflake.com”>
2022-04-01 08:08:59 +08:00
|
|
|
} else if (role == ServerRole::FlowProcess) {
|
2022-08-24 15:40:38 +08:00
|
|
|
std::string traceFormat = getTraceFormatExtension();
|
|
|
|
// close and reopen trace file with the correct process listen address to name the file
|
|
|
|
closeTraceFile();
|
|
|
|
// writer is not shutdown immediately, addref on it
|
|
|
|
disposeTraceFileWriter();
|
|
|
|
// use the same trace format as before
|
|
|
|
selectTraceFormatter(traceFormat);
|
|
|
|
// create the trace file with the correct process address
|
|
|
|
openTraceFile(
|
|
|
|
g_network->getLocalAddress(), opts.rollsize, opts.maxLogsSize, opts.logFolder, "trace", opts.logGroup);
|
2023-01-11 08:37:54 +08:00
|
|
|
auto m =
|
|
|
|
startSystemMonitor(opts.dataFolder, opts.dcId, opts.zoneId, opts.zoneId, opts.localities.dataHallId());
|
2022-08-24 15:40:38 +08:00
|
|
|
TraceEvent(SevDebug, "StartingFlowProcess").detail("FlowProcessName", opts.flowProcessName);
|
2022-09-04 04:21:01 +08:00
|
|
|
#if defined(__linux__)
|
Remote ikvs debugging (#6465)
* initial structure for remote IKVS server
* moved struct to .h file, added new files to CMakeList
* happy path implementation, connection error when testing
* saved minor local change
* changed tracing to debug
* fixed onClosed and getError being called before init is finished
* fix spawn process bug, now use absolute path
* added server knob to set ikvs process port number
* added server knob for remote/local kv store
* implement simulator remote process spawning
* fixed bug for simulator timeout
* commit all changes
* removed print lines in trace
* added FlowProcess implementation by Markus
* initial debug of FlowProcess, stuck at parent sending OpenKVStoreRequest to child
* temporary fix for process factory throwing segfault on create
* specify public address in command
* change remote kv store knob to false for jenkins build
* made port 0 open random unused port
* change remote store knob to true for benchmark
* set listening port to randomly opened port
* added print lines for jenkins run open kv store timeout debug
* removed most tracing and print lines
* removed tutorial changes
* update handleIOErrors error handling to handle remote-ikvs cases
* Push all debugging changes
* A version where worker bug exists
* A version where restarting tests fail
* Use both the name and the port to determine the child process
* Remove unnecessary update on local address
* Disable remote-kvs for DiskFailureCycle test
* A version where restarting stuck
* A version where most restarting tests green
* Reset connection with child process explicitly
* Remove change on unnecessary files
* Unify flags from _ to -
* fix merging unexpected changes
* fix trac.error to .errorUnsuppressed
* Add license header
* Remove unnecessary header in FlowProcess.actor.cpp
* Fix Windows build
* Fix Windows build, add missing ;
* Fix a stupid bug caused by code dropped by code merging
* Disable remote kvs by default
* Pass the conn_file path to the flow process, though not needed, but the buildNetwork is difficult to tune
* serialization change on readrange
* Update traces
* Refactor the RemoteIKVS interface
* Format files
* Update sim2 interface to not clog connections between parent and child processes in simulation
* Update comments; remove debugging symbols; Add error handling for remote_kvs_cancelled
* Add comments, format files
* Change method name from isBuggifyDisabled to isStableConnection; Decrease(0.1x) latency for stable connections
* Commit the IConnection interface change, forgot in previous commit
* Fix the issue that onClosed request is cancelled by ActorCollection
* Enable the remote kv store knob
* Remove FlowProcess.actor.cpp and move functions to RemoteIKeyValueStore.actor.cpp; Add remote kv store delay to avoid race; Bind the child process to die with parent process
* Fix the bug where one process starts storage server more than once
* Add a please_reboot_remote_kv_store error to restart the storage server worker if remote kvs died abnormally
* Remove unreachable code path and add comments
* Clang format the code
* Fix a simple wait error
* Clang format after merging the main branch
* Testing mixed mode in simulation if remote_kvs knob is enabled, setting the default to false
* Disable remote kvs for PhysicalShardMove which is for RocksDB
* Cleanup #include orders, remove debugging traces
* Revert the reorder in fdbserver.actor.cpp, which fails the gcc build
Co-authored-by: “Lincoln <“lincoln.xiao@snowflake.com”>
2022-04-01 08:08:59 +08:00
|
|
|
prctl(PR_SET_PDEATHSIG, SIGTERM);
|
|
|
|
if (getppid() == 1) /* parent already died before prctl */
|
|
|
|
flushAndExit(FDB_EXIT_SUCCESS);
|
2022-09-04 04:21:01 +08:00
|
|
|
#elif defined(__FreeBSD__)
|
|
|
|
const int sig = SIGTERM;
|
2022-09-04 17:58:35 +08:00
|
|
|
procctl(P_PID, 0, PROC_PDEATHSIG_CTL, (void*)&sig);
|
2022-09-04 04:21:01 +08:00
|
|
|
if (getppid() == 1) /* parent already died before procctl */
|
|
|
|
flushAndExit(FDB_EXIT_SUCCESS);
|
Remote ikvs debugging (#6465)
* initial structure for remote IKVS server
* moved struct to .h file, added new files to CMakeList
* happy path implementation, connection error when testing
* saved minor local change
* changed tracing to debug
* fixed onClosed and getError being called before init is finished
* fix spawn process bug, now use absolute path
* added server knob to set ikvs process port number
* added server knob for remote/local kv store
* implement simulator remote process spawning
* fixed bug for simulator timeout
* commit all changes
* removed print lines in trace
* added FlowProcess implementation by Markus
* initial debug of FlowProcess, stuck at parent sending OpenKVStoreRequest to child
* temporary fix for process factory throwing segfault on create
* specify public address in command
* change remote kv store knob to false for jenkins build
* made port 0 open random unused port
* change remote store knob to true for benchmark
* set listening port to randomly opened port
* added print lines for jenkins run open kv store timeout debug
* removed most tracing and print lines
* removed tutorial changes
* update handleIOErrors error handling to handle remote-ikvs cases
* Push all debugging changes
* A version where worker bug exists
* A version where restarting tests fail
* Use both the name and the port to determine the child process
* Remove unnecessary update on local address
* Disable remote-kvs for DiskFailureCycle test
* A version where restarting stuck
* A version where most restarting tests green
* Reset connection with child process explicitly
* Remove change on unnecessary files
* Unify flags from _ to -
* fix merging unexpected changes
* fix trac.error to .errorUnsuppressed
* Add license header
* Remove unnecessary header in FlowProcess.actor.cpp
* Fix Windows build
* Fix Windows build, add missing ;
* Fix a stupid bug caused by code dropped by code merging
* Disable remote kvs by default
* Pass the conn_file path to the flow process, though not needed, but the buildNetwork is difficult to tune
* serialization change on readrange
* Update traces
* Refactor the RemoteIKVS interface
* Format files
* Update sim2 interface to not clog connections between parent and child processes in simulation
* Update comments; remove debugging symbols; Add error handling for remote_kvs_cancelled
* Add comments, format files
* Change method name from isBuggifyDisabled to isStableConnection; Decrease(0.1x) latency for stable connections
* Commit the IConnection interface change, forgot in previous commit
* Fix the issue that onClosed request is cancelled by ActorCollection
* Enable the remote kv store knob
* Remove FlowProcess.actor.cpp and move functions to RemoteIKeyValueStore.actor.cpp; Add remote kv store delay to avoid race; Bind the child process to die with parent process
* Fix the bug where one process starts storage server more than once
* Add a please_reboot_remote_kv_store error to restart the storage server worker if remote kvs died abnormally
* Remove unreachable code path and add comments
* Clang format the code
* Fix a simple wait error
* Clang format after merging the main branch
* Testing mixed mode in simulation if remote_kvs knob is enabled, setting the default to false
* Disable remote kvs for PhysicalShardMove which is for RocksDB
* Cleanup #include orders, remove debugging traces
* Revert the reorder in fdbserver.actor.cpp, which fails the gcc build
Co-authored-by: “Lincoln <“lincoln.xiao@snowflake.com”>
2022-04-01 08:08:59 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
if (opts.flowProcessName == "KeyValueStoreProcess") {
|
|
|
|
ProcessFactory<KeyValueStoreProcess>(opts.flowProcessName.c_str());
|
|
|
|
}
|
|
|
|
f = stopAfter(runFlowProcess(opts.flowProcessName, opts.flowProcessEndpoint));
|
|
|
|
g_network->run();
|
2022-01-26 13:51:22 +08:00
|
|
|
} else if (role == ServerRole::KVFileDump) {
|
|
|
|
f = stopAfter(KVFileDump(opts.kvFile));
|
|
|
|
g_network->run();
|
2022-07-26 13:12:28 +08:00
|
|
|
} else if (role == ServerRole::ChangeClusterKey) {
|
|
|
|
Key newClusterKey(opts.newClusterKey);
|
|
|
|
Key oldClusterKey = opts.connectionFile->getConnectionString().clusterKey();
|
|
|
|
f = stopAfter(coordChangeClusterKey(opts.dataFolder, newClusterKey, oldClusterKey));
|
|
|
|
g_network->run();
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
int rc = FDB_EXIT_SUCCESS;
|
|
|
|
if (f.isValid() && f.isReady() && !f.isError() && !f.get().present()) {
|
|
|
|
rc = FDB_EXIT_ERROR;
|
|
|
|
}
|
|
|
|
|
2019-05-11 05:01:52 +08:00
|
|
|
int unseed = noUnseed ? 0 : deterministicRandom()->randomInt(0, 100001);
|
2017-05-26 04:48:44 +08:00
|
|
|
TraceEvent("ElapsedTime")
|
|
|
|
.detail("SimTime", now() - startNow)
|
|
|
|
.detail("RealTime", timer() - start)
|
|
|
|
.detail("RandomUnseed", unseed);
|
|
|
|
|
2020-11-15 15:06:46 +08:00
|
|
|
if (role == ServerRole::Simulation) {
|
2017-05-26 04:48:44 +08:00
|
|
|
printf("Unseed: %d\n", unseed);
|
|
|
|
printf("Elapsed: %f simsec, %f real seconds\n", now() - startNow, timer() - start);
|
|
|
|
}
|
|
|
|
|
|
|
|
// IFailureMonitor::failureMonitor().address_info.clear();
|
|
|
|
|
|
|
|
// we should have shut down ALL actors associated with this machine; let's list all of the ones still live
|
|
|
|
/*{
|
|
|
|
auto living = Actor::all;
|
|
|
|
printf("%d surviving actors:\n", living.size());
|
|
|
|
for(auto a = living.begin(); a != living.end(); ++a)
|
|
|
|
printf(" #%lld %s %p\n", (*a)->creationIndex, (*a)->getName(), (*a));
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
auto living = DatabaseContext::all;
|
|
|
|
printf("%d surviving DatabaseContexts:\n", living.size());
|
|
|
|
for(auto a = living.begin(); a != living.end(); ++a)
|
|
|
|
printf(" #%lld %p\n", (*a)->creationIndex, (*a));
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
auto living = TransactionData::all;
|
|
|
|
printf("%d surviving TransactionData(s):\n", living.size());
|
|
|
|
for(auto a = living.begin(); a != living.end(); ++a)
|
|
|
|
printf(" #%lld %p\n", (*a)->creationIndex, (*a));
|
|
|
|
}*/
|
|
|
|
|
2021-09-17 08:42:34 +08:00
|
|
|
/*cout << Actor::allActors.size() << " surviving actors:" << std::endl;
|
2017-05-26 04:48:44 +08:00
|
|
|
std::map<std::string,int> actorCount;
|
|
|
|
for(int i=0; i<Actor::allActors.size(); i++)
|
|
|
|
++actorCount[Actor::allActors[i]->getName()];
|
|
|
|
for(auto i = actorCount.rbegin(); !(i == actorCount.rend()); ++i)
|
2021-09-17 08:42:34 +08:00
|
|
|
std::cout << " " << i->second << " " << i->first << std::endl;*/
|
|
|
|
// std::cout << " " << Actor::allActors[i]->getName() << std::endl;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-11-15 15:06:46 +08:00
|
|
|
if (role == ServerRole::Simulation) {
|
2017-12-16 10:20:33 +08:00
|
|
|
unsigned long sevErrorEventsLogged = TraceEvent::CountEventsLoggedAt(SevError);
|
|
|
|
if (sevErrorEventsLogged > 0) {
|
|
|
|
printf("%lu SevError events logged\n", sevErrorEventsLogged);
|
|
|
|
rc = FDB_EXIT_ERROR;
|
|
|
|
}
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2022-09-15 08:10:49 +08:00
|
|
|
// g_simulator->run();
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
#ifdef ALLOC_INSTRUMENTATION
|
|
|
|
{
|
|
|
|
std::cout << "Page Counts: " << FastAllocator<16>::pageCount << " " << FastAllocator<32>::pageCount << " "
|
|
|
|
<< FastAllocator<64>::pageCount << " " << FastAllocator<128>::pageCount << " "
|
|
|
|
<< FastAllocator<256>::pageCount << " " << FastAllocator<512>::pageCount << " "
|
|
|
|
<< FastAllocator<1024>::pageCount << " " << FastAllocator<2048>::pageCount << " "
|
2019-03-09 04:37:04 +08:00
|
|
|
<< FastAllocator<4096>::pageCount << " " << FastAllocator<8192>::pageCount << " "
|
2020-06-26 11:44:43 +08:00
|
|
|
<< FastAllocator<16384>::pageCount << std::endl;
|
2021-03-11 02:06:03 +08:00
|
|
|
|
2021-09-17 08:42:34 +08:00
|
|
|
std::vector<std::pair<std::string, const char*>> typeNames;
|
2017-05-26 04:48:44 +08:00
|
|
|
for (auto i = allocInstr.begin(); i != allocInstr.end(); ++i) {
|
|
|
|
std::string s;
|
|
|
|
|
|
|
|
#ifdef __linux__
|
2020-08-19 05:18:50 +08:00
|
|
|
char* demangled = abi::__cxa_demangle(i->first, nullptr, nullptr, nullptr);
|
2017-05-26 04:48:44 +08:00
|
|
|
if (demangled) {
|
|
|
|
s = demangled;
|
2022-09-20 02:35:58 +08:00
|
|
|
if (StringRef(s).startsWith("(anonymous namespace)::"_sr))
|
|
|
|
s = s.substr("(anonymous namespace)::"_sr.size());
|
2017-05-26 04:48:44 +08:00
|
|
|
free(demangled);
|
|
|
|
} else
|
|
|
|
s = i->first;
|
|
|
|
#else
|
|
|
|
s = i->first;
|
2022-09-20 02:35:58 +08:00
|
|
|
if (StringRef(s).startsWith("class `anonymous namespace'::"_sr))
|
|
|
|
s = s.substr("class `anonymous namespace'::"_sr.size());
|
|
|
|
else if (StringRef(s).startsWith("class "_sr))
|
|
|
|
s = s.substr("class "_sr.size());
|
|
|
|
else if (StringRef(s).startsWith("struct "_sr))
|
|
|
|
s = s.substr("struct "_sr.size());
|
2017-05-26 04:48:44 +08:00
|
|
|
#endif
|
|
|
|
|
2021-05-11 07:32:02 +08:00
|
|
|
typeNames.emplace_back(s, i->first);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
std::sort(typeNames.begin(), typeNames.end());
|
|
|
|
for (int i = 0; i < typeNames.size(); i++) {
|
|
|
|
const char* n = typeNames[i].second;
|
|
|
|
auto& f = allocInstr[n];
|
|
|
|
printf("%+d\t%+d\t%d\t%d\t%s\n",
|
|
|
|
f.allocCount,
|
|
|
|
-f.deallocCount,
|
|
|
|
f.allocCount - f.deallocCount,
|
|
|
|
f.maxAllocated,
|
|
|
|
typeNames[i].first.c_str());
|
|
|
|
}
|
|
|
|
|
|
|
|
// We're about to exit and clean up data structures, this will wreak havoc on allocation recording
|
|
|
|
memSample_entered = true;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
// printf("\n%d tests passed; %d tests failed\n", passCount, failCount);
|
|
|
|
flushAndExit(rc);
|
|
|
|
} catch (Error& e) {
|
|
|
|
fprintf(stderr, "Error: %s\n", e.what());
|
|
|
|
TraceEvent(SevError, "MainError").error(e);
|
|
|
|
// printf("\n%d tests passed; %d tests failed\n", passCount, failCount);
|
|
|
|
flushAndExit(FDB_EXIT_MAIN_ERROR);
|
2020-02-21 08:53:01 +08:00
|
|
|
} catch (boost::system::system_error& e) {
|
2020-02-27 04:26:43 +08:00
|
|
|
ASSERT_WE_THINK(false); // boost errors shouldn't leak
|
2020-02-21 08:53:01 +08:00
|
|
|
fprintf(stderr, "boost::system::system_error: %s (%d)", e.what(), e.code().value());
|
|
|
|
TraceEvent(SevError, "MainError").error(unknown_error()).detail("RootException", e.what());
|
|
|
|
// printf("\n%d tests passed; %d tests failed\n", passCount, failCount);
|
|
|
|
flushAndExit(FDB_EXIT_MAIN_EXCEPTION);
|
2017-05-26 04:48:44 +08:00
|
|
|
} catch (std::exception& e) {
|
|
|
|
fprintf(stderr, "std::exception: %s\n", e.what());
|
2018-06-09 02:11:08 +08:00
|
|
|
TraceEvent(SevError, "MainError").error(unknown_error()).detail("RootException", e.what());
|
2017-05-26 04:48:44 +08:00
|
|
|
// printf("\n%d tests passed; %d tests failed\n", passCount, failCount);
|
|
|
|
flushAndExit(FDB_EXIT_MAIN_EXCEPTION);
|
|
|
|
}
|
|
|
|
|
|
|
|
static_assert(LBLocalityData<StorageServerInterface>::Present, "Storage server interface should be load balanced");
|
2020-09-11 08:44:15 +08:00
|
|
|
static_assert(LBLocalityData<CommitProxyInterface>::Present, "Commit proxy interface should be load balanced");
|
|
|
|
static_assert(LBLocalityData<GrvProxyInterface>::Present, "GRV proxy interface should be load balanced");
|
2017-05-26 04:48:44 +08:00
|
|
|
static_assert(LBLocalityData<TLogInterface>::Present, "TLog interface should be load balanced");
|
|
|
|
static_assert(!LBLocalityData<MasterInterface>::Present, "Master interface should not be load balanced");
|
|
|
|
}
|