2017-05-26 04:48:44 +08:00
/*
* Knobs . cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013 - 2018 Apple Inc . and the FoundationDB project authors
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* Licensed under the Apache License , Version 2.0 ( the " License " ) ;
* you may not use this file except in compliance with the License .
* You may obtain a copy of the License at
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* http : //www.apache.org/licenses/LICENSE-2.0
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an " AS IS " BASIS ,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
* See the License for the specific language governing permissions and
* limitations under the License .
*/
# include "Knobs.h"
# include "fdbrpc/Locality.h"
ServerKnobs const * SERVER_KNOBS = new ServerKnobs ( ) ;
# define init( knob, value ) initKnob( knob, value, #knob )
ServerKnobs : : ServerKnobs ( bool randomize , ClientKnobs * clientKnobs ) {
// TLogs
init ( TLOG_TIMEOUT , 0.4 ) ; //cannot buggify because of availability
init ( RECOVERY_TLOG_SMART_QUORUM_DELAY , 0.25 ) ; if ( randomize & & BUGGIFY ) RECOVERY_TLOG_SMART_QUORUM_DELAY = 0.0 ; // smaller might be better for bug amplification
init ( TLOG_STORAGE_MIN_UPDATE_INTERVAL , 0.5 ) ;
init ( BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL , 30 ) ;
init ( UNFLUSHED_DATA_RATIO , 0.05 ) ; if ( randomize & & BUGGIFY ) UNFLUSHED_DATA_RATIO = 0.0 ;
init ( DESIRED_TOTAL_BYTES , 150000 ) ; if ( randomize & & BUGGIFY ) DESIRED_TOTAL_BYTES = 10000 ;
init ( MAXIMUM_PEEK_BYTES , 10e6 ) ;
init ( APPLY_MUTATION_BYTES , 1e6 ) ;
init ( RECOVERY_DATA_BYTE_LIMIT , 100000 ) ;
init ( BUGGIFY_RECOVERY_DATA_LIMIT , 1000 ) ;
init ( LONG_TLOG_COMMIT_TIME , 0.25 ) ; //cannot buggify because of recovery time
init ( LARGE_TLOG_COMMIT_BYTES , 4 < < 20 ) ;
init ( BUGGIFY_RECOVER_MEMORY_LIMIT , 1e6 ) ;
init ( BUGGIFY_WORKER_REMOVED_MAX_LAG , 30 ) ;
init ( UPDATE_STORAGE_BYTE_LIMIT , 1e6 ) ;
init ( TLOG_PEEK_DELAY , 0.00005 ) ;
init ( LEGACY_TLOG_UPGRADE_ENTRIES_PER_VERSION , 100 ) ;
init ( VERSION_MESSAGES_OVERHEAD_FACTOR_1024THS , 1072 ) ; // Based on a naive interpretation of the gcc version of std::deque, we would expect this to be 16 bytes overhead per 512 bytes data. In practice, it seems to be 24 bytes overhead per 512.
init ( LOG_SYSTEM_PUSHED_DATA_BLOCK_SIZE , 1e5 ) ;
init ( MAX_MESSAGE_SIZE , std : : max < int > ( LOG_SYSTEM_PUSHED_DATA_BLOCK_SIZE , 1e5 + 2e4 + 1 ) + 8 ) ; // VALUE_SIZE_LIMIT + SYSTEM_KEY_SIZE_LIMIT + 9 bytes (4 bytes for length, 4 bytes for sequence number, and 1 byte for mutation type)
init ( TLOG_MESSAGE_BLOCK_BYTES , 10e6 ) ;
2017-06-29 07:52:45 +08:00
init ( TLOG_MESSAGE_BLOCK_OVERHEAD_FACTOR , double ( TLOG_MESSAGE_BLOCK_BYTES ) / ( TLOG_MESSAGE_BLOCK_BYTES - MAX_MESSAGE_SIZE ) ) ; //1.0121466709838096006362758832473
2017-09-19 02:04:51 +08:00
init ( PEEK_TRACKER_EXPIRATION_TIME , 600 ) ; if ( randomize & & BUGGIFY ) PEEK_TRACKER_EXPIRATION_TIME = g_random - > coinflip ( ) ? 0.1 : 60 ;
2017-05-26 04:48:44 +08:00
init ( PARALLEL_GET_MORE_REQUESTS , 32 ) ; if ( randomize & & BUGGIFY ) PARALLEL_GET_MORE_REQUESTS = 2 ;
init ( MAX_QUEUE_COMMIT_BYTES , 15e6 ) ; if ( randomize & & BUGGIFY ) MAX_QUEUE_COMMIT_BYTES = 5000 ;
// Versions
init ( MAX_VERSIONS_IN_FLIGHT , 100000000 ) ;
init ( VERSIONS_PER_SECOND , 1000000 ) ;
init ( MAX_READ_TRANSACTION_LIFE_VERSIONS , 5 * VERSIONS_PER_SECOND ) ; if ( randomize & & BUGGIFY ) MAX_READ_TRANSACTION_LIFE_VERSIONS = std : : max < int > ( 1 , 0.1 * VERSIONS_PER_SECOND ) ; else if ( randomize & & BUGGIFY ) MAX_READ_TRANSACTION_LIFE_VERSIONS = 10 * VERSIONS_PER_SECOND ;
2017-06-15 07:43:53 +08:00
init ( MAX_WRITE_TRANSACTION_LIFE_VERSIONS , 5 * VERSIONS_PER_SECOND ) ; if ( randomize & & BUGGIFY ) MAX_WRITE_TRANSACTION_LIFE_VERSIONS = std : : max < int > ( 1 , 1 * VERSIONS_PER_SECOND ) ;
2017-05-26 04:48:44 +08:00
init ( MAX_COMMIT_BATCH_INTERVAL , 0.5 ) ; if ( randomize & & BUGGIFY ) MAX_COMMIT_BATCH_INTERVAL = 2.0 ; // Each master proxy generates a CommitTransactionBatchRequest at least this often, so that versions always advance smoothly
2018-01-10 09:26:42 +08:00
MAX_COMMIT_BATCH_INTERVAL = std : : min ( MAX_COMMIT_BATCH_INTERVAL , MAX_READ_TRANSACTION_LIFE_VERSIONS / double ( 2 * VERSIONS_PER_SECOND ) ) ; // Ensure that the proxy commits 2 times every MAX_READ_TRANSACTION_LIFE_VERSIONS, otherwise the master will not give out versions fast enough
2017-05-26 04:48:44 +08:00
// Data distribution queue
init ( HEALTH_POLL_TIME , 1.0 ) ;
init ( BEST_TEAM_STUCK_DELAY , 1.0 ) ;
init ( BG_DD_POLLING_INTERVAL , 10.0 ) ;
init ( DD_QUEUE_LOGGING_INTERVAL , 5.0 ) ;
init ( RELOCATION_PARALLELISM_PER_SOURCE_SERVER , 4 ) ; if ( randomize & & BUGGIFY ) RELOCATION_PARALLELISM_PER_SOURCE_SERVER = 1 ;
init ( DD_QUEUE_MAX_KEY_SERVERS , 100 ) ; if ( randomize & & BUGGIFY ) DD_QUEUE_MAX_KEY_SERVERS = 1 ;
init ( DD_REBALANCE_PARALLELISM , 50 ) ;
init ( DD_REBALANCE_RESET_AMOUNT , 30 ) ;
init ( BG_DD_MAX_WAIT , 120.0 ) ;
init ( BG_DD_MIN_WAIT , 0.1 ) ;
init ( BG_DD_INCREASE_RATE , 1.10 ) ;
init ( BG_DD_DECREASE_RATE , 1.02 ) ;
init ( BG_DD_SATURATION_DELAY , 1.0 ) ;
init ( INFLIGHT_PENALTY_HEALTHY , 1.0 ) ;
init ( INFLIGHT_PENALTY_UNHEALTHY , 10.0 ) ;
init ( INFLIGHT_PENALTY_ONE_LEFT , 1000.0 ) ;
// Data distribution
init ( RETRY_RELOCATESHARD_DELAY , 0.1 ) ;
init ( DATA_DISTRIBUTION_FAILURE_REACTION_TIME , 10.0 ) ; if ( randomize & & BUGGIFY ) DATA_DISTRIBUTION_FAILURE_REACTION_TIME = 1.0 ;
bool buggifySmallShards = randomize & & BUGGIFY ;
init ( MIN_SHARD_BYTES , 200000 ) ; if ( buggifySmallShards ) MIN_SHARD_BYTES = 40000 ; //FIXME: data distribution tracker (specifically StorageMetrics) relies on this number being larger than the maximum size of a key value pair
init ( SHARD_BYTES_RATIO , 4 ) ;
init ( SHARD_BYTES_PER_SQRT_BYTES , 45 ) ; if ( buggifySmallShards ) SHARD_BYTES_PER_SQRT_BYTES = 0 ; //Approximately 10000 bytes per shard
init ( MAX_SHARD_BYTES , 500000000 ) ;
2017-10-28 02:52:11 +08:00
init ( KEY_SERVER_SHARD_BYTES , 500000000 ) ;
2017-05-26 04:48:44 +08:00
bool buggifySmallBandwidthSplit = randomize & & BUGGIFY ;
init ( SHARD_MAX_BYTES_PER_KSEC , 1LL * 1000000 * 1000 ) ; if ( buggifySmallBandwidthSplit ) SHARD_MAX_BYTES_PER_KSEC = 10LL * 1000 * 1000 ;
/* 10*1MB/sec * 1000sec/ksec
Shards with more than this bandwidth will be split immediately .
For a large shard ( 100 MB ) , splitting it costs ~ 100 MB of work or about 10 MB / sec over a 10 sec sampling window .
If the sampling window is too much longer , the MVCC window will fill up while we wait .
If SHARD_MAX_BYTES_PER_KSEC is too much lower , we could do a lot of data movement work in response to a small impulse of bandwidth .
If SHARD_MAX_BYTES_PER_KSEC is too high relative to the I / O bandwidth of a given server , a workload can remain concentrated on a single
team indefinitely , limiting performance .
*/
init ( SHARD_MIN_BYTES_PER_KSEC , 100 * 1000 * 1000 ) ; if ( buggifySmallBandwidthSplit ) SHARD_MIN_BYTES_PER_KSEC = 200 * 1 * 1000 ;
/* 200*1KB/sec * 1000sec/ksec
Shards with more than this bandwidth will not be merged .
Obviously this needs to be significantly less than SHARD_MAX_BYTES_PER_KSEC , else we will repeatedly merge and split .
It should probably be significantly less than SHARD_SPLIT_BYTES_PER_KSEC , else we will merge right after splitting .
The number of extra shards in the database because of bandwidth splitting can ' t be more than about W / SHARD_MIN_BYTES_PER_KSEC , where
W is the maximum bandwidth of the entire database in bytes / ksec . For 250 MB / sec write bandwidth , ( 250 MB / sec ) / ( 200 KB / sec ) = 1250 extra
shards .
The bandwidth sample maintained by the storage server needs to be accurate enough to reliably measure this minimum bandwidth . See
BANDWIDTH_UNITS_PER_SAMPLE . If this number is too low , the storage server needs to spend more memory and time on sampling .
*/
init ( SHARD_SPLIT_BYTES_PER_KSEC , 250 * 1000 * 1000 ) ; if ( buggifySmallBandwidthSplit ) SHARD_SPLIT_BYTES_PER_KSEC = 50 * 1000 * 1000 ;
/* 500*1KB/sec * 1000sec/ksec
When splitting a shard , it is split into pieces with less than this bandwidth .
Obviously this should be less than half of SHARD_MAX_BYTES_PER_KSEC .
Smaller values mean that high bandwidth shards are split into more pieces , more quickly utilizing large numbers of servers to handle the
bandwidth .
Too many pieces ( too small a value ) may stress data movement mechanisms ( see e . g . RELOCATION_PARALLELISM_PER_SOURCE_SERVER ) .
If this value is too small relative to SHARD_MIN_BYTES_PER_KSEC immediate merging work will be generated .
*/
2017-09-19 02:04:51 +08:00
init ( STORAGE_METRIC_TIMEOUT , 600.0 ) ; if ( randomize & & BUGGIFY ) STORAGE_METRIC_TIMEOUT = g_random - > coinflip ( ) ? 10.0 : 60.0 ;
2017-05-26 04:48:44 +08:00
init ( METRIC_DELAY , 0.1 ) ; if ( randomize & & BUGGIFY ) METRIC_DELAY = 1.0 ;
init ( ALL_DATA_REMOVED_DELAY , 1.0 ) ;
init ( INITIAL_FAILURE_REACTION_DELAY , 30.0 ) ; if ( randomize & & BUGGIFY ) INITIAL_FAILURE_REACTION_DELAY = 0.0 ;
init ( LOG_ON_COMPLETION_DELAY , DD_QUEUE_LOGGING_INTERVAL ) ;
init ( BEST_TEAM_MAX_TEAM_TRIES , 10 ) ;
init ( BEST_TEAM_OPTION_COUNT , 4 ) ;
init ( BEST_OF_AMT , 4 ) ;
init ( SERVER_LIST_DELAY , 1.0 ) ;
init ( RECRUITMENT_IDLE_DELAY , 1.0 ) ;
init ( STORAGE_RECRUITMENT_DELAY , 0.5 ) ;
init ( DATA_DISTRIBUTION_LOGGING_INTERVAL , 5.0 ) ;
init ( DD_ENABLED_CHECK_DELAY , 1.0 ) ;
init ( DD_MERGE_COALESCE_DELAY , 120.0 ) ; if ( randomize & & BUGGIFY ) DD_MERGE_COALESCE_DELAY = 0.001 ;
init ( STORAGE_METRICS_POLLING_DELAY , 2.0 ) ; if ( randomize & & BUGGIFY ) STORAGE_METRICS_POLLING_DELAY = 15.0 ;
init ( STORAGE_METRICS_RANDOM_DELAY , 0.2 ) ;
init ( FREE_SPACE_RATIO_CUTOFF , 0.1 ) ;
init ( FREE_SPACE_RATIO_DD_CUTOFF , 0.2 ) ;
init ( DESIRED_TEAMS_PER_SERVER , 5 ) ; if ( randomize & & BUGGIFY ) DESIRED_TEAMS_PER_SERVER = 1 ;
init ( DD_SHARD_SIZE_GRANULARITY , 5000000 ) ;
init ( DD_SHARD_SIZE_GRANULARITY_SIM , 500000 ) ; if ( randomize & & BUGGIFY ) DD_SHARD_SIZE_GRANULARITY_SIM = 0 ;
init ( DD_MOVE_KEYS_PARALLELISM , 20 ) ; if ( randomize & & BUGGIFY ) DD_MOVE_KEYS_PARALLELISM = 1 ;
init ( DD_MERGE_LIMIT , 2000 ) ; if ( randomize & & BUGGIFY ) DD_MERGE_LIMIT = 2 ;
init ( DD_SHARD_METRICS_TIMEOUT , 60.0 ) ; if ( randomize & & BUGGIFY ) DD_SHARD_METRICS_TIMEOUT = 0.1 ;
init ( DD_LOCATION_CACHE_SIZE , 2000000 ) ; if ( randomize & & BUGGIFY ) DD_LOCATION_CACHE_SIZE = 3 ;
init ( MOVEKEYS_LOCK_POLLING_DELAY , 5.0 ) ;
init ( DEBOUNCE_RECRUITING_DELAY , 5.0 ) ;
// KeyValueStore SQLITE
init ( CLEAR_BUFFER_SIZE , 20000 ) ;
init ( READ_VALUE_TIME_ESTIMATE , .00005 ) ;
init ( READ_RANGE_TIME_ESTIMATE , .00005 ) ;
init ( SET_TIME_ESTIMATE , .00005 ) ;
init ( CLEAR_TIME_ESTIMATE , .00005 ) ;
init ( COMMIT_TIME_ESTIMATE , .005 ) ;
init ( CHECK_FREE_PAGE_AMOUNT , 100 ) ; if ( randomize & & BUGGIFY ) CHECK_FREE_PAGE_AMOUNT = 5 ;
init ( DISK_METRIC_LOGGING_INTERVAL , 5.0 ) ;
init ( SOFT_HEAP_LIMIT , 300e6 ) ;
init ( SQLITE_PAGE_SCAN_ERROR_LIMIT , 10000 ) ;
init ( SQLITE_BTREE_PAGE_USABLE , 4096 - 8 ) ; // pageSize - reserveSize for page checksum
// Maximum and minimum cell payload bytes allowed on primary page as calculated in SQLite.
// These formulas are copied from SQLite, using its hardcoded constants, so if you are
// changing this you should also be changing SQLite.
init ( SQLITE_BTREE_CELL_MAX_LOCAL , ( SQLITE_BTREE_PAGE_USABLE - 12 ) * 64 / 255 - 23 ) ;
init ( SQLITE_BTREE_CELL_MIN_LOCAL , ( SQLITE_BTREE_PAGE_USABLE - 12 ) * 32 / 255 - 23 ) ;
// Maximum FDB fragment key and value bytes that can fit in a primary btree page
init ( SQLITE_FRAGMENT_PRIMARY_PAGE_USABLE ,
SQLITE_BTREE_CELL_MAX_LOCAL
- 1 // vdbeRecord header length size
- 2 // max key length size
- 4 // max index length size
- 2 // max value fragment length size
) ;
// Maximum FDB fragment value bytes in an overflow page
init ( SQLITE_FRAGMENT_OVERFLOW_PAGE_USABLE ,
SQLITE_BTREE_PAGE_USABLE
- 4 // next pageNumber size
) ;
init ( SQLITE_FRAGMENT_MIN_SAVINGS , 0.20 ) ;
// KeyValueStoreSqlite spring cleaning
init ( CLEANING_INTERVAL , 1.0 ) ;
init ( SPRING_CLEANING_TIME_ESTIMATE , .010 ) ;
init ( SPRING_CLEANING_VACUUMS_PER_LAZY_DELETE_PAGE , 0.0 ) ; if ( randomize & & BUGGIFY ) SPRING_CLEANING_VACUUMS_PER_LAZY_DELETE_PAGE = g_random - > coinflip ( ) ? 1e9 : g_random - > random01 ( ) * 5 ;
init ( SPRING_CLEANING_MIN_LAZY_DELETE_PAGES , 0 ) ; if ( randomize & & BUGGIFY ) SPRING_CLEANING_MIN_LAZY_DELETE_PAGES = g_random - > randomInt ( 1 , 100 ) ;
init ( SPRING_CLEANING_MAX_LAZY_DELETE_PAGES , 1e9 ) ; if ( randomize & & BUGGIFY ) SPRING_CLEANING_MAX_LAZY_DELETE_PAGES = g_random - > coinflip ( ) ? 0 : g_random - > randomInt ( 1 , 1e4 ) ;
init ( SPRING_CLEANING_LAZY_DELETE_BATCH_SIZE , 100 ) ; if ( randomize & & BUGGIFY ) SPRING_CLEANING_LAZY_DELETE_BATCH_SIZE = g_random - > randomInt ( 1 , 1000 ) ;
init ( SPRING_CLEANING_MIN_VACUUM_PAGES , 1 ) ; if ( randomize & & BUGGIFY ) SPRING_CLEANING_MIN_VACUUM_PAGES = g_random - > randomInt ( 0 , 100 ) ;
init ( SPRING_CLEANING_MAX_VACUUM_PAGES , 1e9 ) ; if ( randomize & & BUGGIFY ) SPRING_CLEANING_MAX_VACUUM_PAGES = g_random - > coinflip ( ) ? 0 : g_random - > randomInt ( 1 , 1e4 ) ;
// Leader election
bool longLeaderElection = randomize & & BUGGIFY ;
init ( CANDIDATE_MIN_DELAY , 0.05 ) ;
init ( CANDIDATE_MAX_DELAY , 1.0 ) ;
init ( CANDIDATE_GROWTH_RATE , 1.2 ) ;
init ( POLLING_FREQUENCY , 1.0 ) ; if ( longLeaderElection ) POLLING_FREQUENCY = 8.0 ;
init ( HEARTBEAT_FREQUENCY , 0.25 ) ; if ( longLeaderElection ) HEARTBEAT_FREQUENCY = 1.0 ;
// Master Proxy
init ( START_TRANSACTION_BATCH_INTERVAL_MIN , 1e-6 ) ;
init ( START_TRANSACTION_BATCH_INTERVAL_MAX , 0.010 ) ;
init ( START_TRANSACTION_BATCH_INTERVAL_LATENCY_FRACTION , 0.5 ) ;
init ( START_TRANSACTION_BATCH_INTERVAL_SMOOTHER_ALPHA , 0.1 ) ;
init ( START_TRANSACTION_BATCH_QUEUE_CHECK_INTERVAL , 0.001 ) ;
init ( START_TRANSACTION_MAX_TRANSACTIONS_TO_START , 10000 ) ;
init ( START_TRANSACTION_MAX_BUDGET_SIZE , 20 ) ; // Currently set to match CLIENT_KNOBS->MAX_BATCH_SIZE
init ( COMMIT_TRANSACTION_BATCH_INTERVAL_FROM_IDLE , 0.0005 ) ; if ( randomize & & BUGGIFY ) COMMIT_TRANSACTION_BATCH_INTERVAL_FROM_IDLE = 0.005 ;
init ( COMMIT_TRANSACTION_BATCH_INTERVAL_MIN , 0.001 ) ; if ( randomize & & BUGGIFY ) COMMIT_TRANSACTION_BATCH_INTERVAL_MIN = 0.1 ;
init ( COMMIT_TRANSACTION_BATCH_INTERVAL_MAX , 0.020 ) ;
init ( COMMIT_TRANSACTION_BATCH_INTERVAL_LATENCY_FRACTION , 0.1 ) ;
init ( COMMIT_TRANSACTION_BATCH_INTERVAL_SMOOTHER_ALPHA , 0.1 ) ;
init ( COMMIT_TRANSACTION_BATCH_COUNT_MAX , 32768 ) ; if ( randomize & & BUGGIFY ) COMMIT_TRANSACTION_BATCH_COUNT_MAX = 1000 ; // Do NOT increase this number beyond 32768, as CommitIds only budget 2 bytes for storing transaction id within each batch
// these settings disable batch bytes scaling. Try COMMIT_TRANSACTION_BATCH_BYTES_MAX=1e6, COMMIT_TRANSACTION_BATCH_BYTES_SCALE_BASE=50000, COMMIT_TRANSACTION_BATCH_BYTES_SCALE_POWER=0.5?
init ( COMMIT_TRANSACTION_BATCH_BYTES_MIN , 100000 ) ;
init ( COMMIT_TRANSACTION_BATCH_BYTES_MAX , 100000 ) ; if ( randomize & & BUGGIFY ) { COMMIT_TRANSACTION_BATCH_BYTES_MIN = COMMIT_TRANSACTION_BATCH_BYTES_MAX = 1000000 ; }
init ( COMMIT_TRANSACTION_BATCH_BYTES_SCALE_BASE , 100000 ) ;
init ( COMMIT_TRANSACTION_BATCH_BYTES_SCALE_POWER , 0.0 ) ;
init ( TRANSACTION_BUDGET_TIME , 0.050 ) ; if ( randomize & & BUGGIFY ) TRANSACTION_BUDGET_TIME = 0.0 ;
init ( RESOLVER_COALESCE_TIME , 1.0 ) ;
init ( BUGGIFIED_ROW_LIMIT , APPLY_MUTATION_BYTES ) ; if ( randomize & & BUGGIFY ) BUGGIFIED_ROW_LIMIT = g_random - > randomInt ( 3 , 30 ) ;
init ( PROXY_SPIN_DELAY , 0.01 ) ;
// Master Server
init ( MASTER_LOGGING_DELAY , 1.0 ) ;
// masterCommitter() in the master server will allow lower priority tasks (e.g. DataDistibution)
// by delay()ing for this amount of time between accepted batches of TransactionRequests.
init ( COMMIT_SLEEP_TIME , 0.0001 ) ; if ( randomize & & BUGGIFY ) COMMIT_SLEEP_TIME = 0 ;
init ( MIN_BALANCE_TIME , 0.2 ) ;
init ( MIN_BALANCE_DIFFERENCE , 10000 ) ;
init ( SECONDS_BEFORE_NO_FAILURE_DELAY , 8 * 3600 ) ;
init ( MAX_TXS_SEND_MEMORY , 1e7 ) ; if ( randomize & & BUGGIFY ) MAX_TXS_SEND_MEMORY = 1e5 ;
// Resolver
init ( SAMPLE_OFFSET_PER_KEY , 100 ) ;
init ( SAMPLE_EXPIRATION_TIME , 1.0 ) ;
init ( SAMPLE_POLL_TIME , 0.1 ) ;
init ( RESOLVER_STATE_MEMORY_LIMIT , 1e6 ) ;
init ( LAST_LIMITED_RATIO , 0.6 ) ;
//Cluster Controller
init ( MASTER_FAILURE_REACTION_TIME , 0.4 ) ; if ( randomize & & BUGGIFY ) MASTER_FAILURE_REACTION_TIME = 10.0 ;
init ( MASTER_FAILURE_SLOPE_DURING_RECOVERY , 0.1 ) ;
init ( WORKER_COORDINATION_PING_DELAY , 60 ) ;
init ( SIM_SHUTDOWN_TIMEOUT , 10 ) ;
2017-09-19 02:04:51 +08:00
init ( SHUTDOWN_TIMEOUT , 600 ) ; if ( randomize & & BUGGIFY ) SHUTDOWN_TIMEOUT = 60.0 ;
2017-05-26 04:48:44 +08:00
init ( MASTER_SPIN_DELAY , 1.0 ) ; if ( randomize & & BUGGIFY ) MASTER_SPIN_DELAY = 10.0 ;
2018-02-10 08:48:55 +08:00
init ( CC_CHANGE_DELAY , 0.1 ) ;
2018-02-07 03:30:05 +08:00
init ( WAIT_FOR_GOOD_RECRUITMENT_DELAY , 0.1 ) ;
2018-02-07 10:11:45 +08:00
init ( ATTEMPT_RECRUITMENT_DELAY , 0.035 ) ;
2017-05-26 04:48:44 +08:00
init ( WORKER_FAILURE_TIME , 1.0 ) ; if ( randomize & & BUGGIFY ) WORKER_FAILURE_TIME = 10.0 ;
init ( CHECK_BETTER_MASTER_INTERVAL , 1.0 ) ; if ( randomize & & BUGGIFY ) CHECK_BETTER_MASTER_INTERVAL = 0.001 ;
2017-09-19 02:04:51 +08:00
init ( INCOMPATIBLE_PEERS_LOGGING_INTERVAL , 600 ) ; if ( randomize & & BUGGIFY ) INCOMPATIBLE_PEERS_LOGGING_INTERVAL = 60.0 ;
2017-05-26 04:48:44 +08:00
init ( EXPECTED_MASTER_FITNESS , ProcessClass : : GoodFit ) ;
init ( EXPECTED_TLOG_FITNESS , ProcessClass : : GoodFit ) ;
2018-02-10 08:48:55 +08:00
init ( EXPECTED_LOG_ROUTER_FITNESS , ProcessClass : : GoodFit ) ;
2017-05-26 04:48:44 +08:00
init ( EXPECTED_PROXY_FITNESS , ProcessClass : : GoodFit ) ;
init ( EXPECTED_RESOLVER_FITNESS , ProcessClass : : GoodFit ) ;
2017-09-19 02:04:51 +08:00
init ( RECRUITMENT_TIMEOUT , 600 ) ; if ( randomize & & BUGGIFY ) RECRUITMENT_TIMEOUT = g_random - > coinflip ( ) ? 60.0 : 1.0 ;
2017-05-26 04:48:44 +08:00
init ( POLICY_RATING_TESTS , 200 ) ; if ( randomize & & BUGGIFY ) POLICY_RATING_TESTS = 20 ;
init ( POLICY_GENERATIONS , 100 ) ; if ( randomize & & BUGGIFY ) POLICY_GENERATIONS = 10 ;
//Move Keys
init ( SHARD_READY_DELAY , 0.25 ) ;
init ( SERVER_READY_QUORUM_INTERVAL , std : : min ( 1.0 , std : : min ( MAX_READ_TRANSACTION_LIFE_VERSIONS , MAX_WRITE_TRANSACTION_LIFE_VERSIONS ) / ( 5.0 * VERSIONS_PER_SECOND ) ) ) ;
init ( SERVER_READY_QUORUM_TIMEOUT , 15.0 ) ; if ( randomize & & BUGGIFY ) SERVER_READY_QUORUM_TIMEOUT = 1.0 ;
init ( REMOVE_RETRY_DELAY , 1.0 ) ;
init ( MOVE_KEYS_KRM_LIMIT , 2000 ) ; if ( randomize & & BUGGIFY ) MOVE_KEYS_KRM_LIMIT = 2 ;
init ( MOVE_KEYS_KRM_LIMIT_BYTES , 1e5 ) ; if ( randomize & & BUGGIFY ) MOVE_KEYS_KRM_LIMIT_BYTES = 5e4 ; //This must be sufficiently larger than CLIENT_KNOBS->KEY_SIZE_LIMIT (fdbclient/Knobs.h) to ensure that at least two entries will be returned from an attempt to read a key range map
init ( SKIP_TAGS_GROWTH_RATE , 2.0 ) ;
init ( MAX_SKIP_TAGS , 100 ) ;
//FdbServer
bool longReboots = randomize & & BUGGIFY ;
init ( MIN_REBOOT_TIME , 4.0 ) ; if ( longReboots ) MIN_REBOOT_TIME = 10.0 ;
init ( MAX_REBOOT_TIME , 5.0 ) ; if ( longReboots ) MAX_REBOOT_TIME = 20.0 ;
2017-10-13 08:49:41 +08:00
init ( LOG_DIRECTORY , " . " ) ; // Will be set to the command line flag.
2017-05-26 04:48:44 +08:00
//Ratekeeper
bool slowRateKeeper = randomize & & BUGGIFY ;
init ( SMOOTHING_AMOUNT , 1.0 ) ; if ( slowRateKeeper ) SMOOTHING_AMOUNT = 5.0 ;
init ( SLOW_SMOOTHING_AMOUNT , 10.0 ) ; if ( slowRateKeeper ) SLOW_SMOOTHING_AMOUNT = 50.0 ;
init ( METRIC_UPDATE_RATE , .1 ) ; if ( slowRateKeeper ) METRIC_UPDATE_RATE = 0.5 ;
bool smallStorageTarget = randomize & & BUGGIFY ;
init ( TARGET_BYTES_PER_STORAGE_SERVER , 1000e6 ) ; if ( smallStorageTarget ) TARGET_BYTES_PER_STORAGE_SERVER = 1000e3 ;
init ( SPRING_BYTES_STORAGE_SERVER , 100e6 ) ; if ( smallStorageTarget ) SPRING_BYTES_STORAGE_SERVER = 100e3 ;
bool smallTlogTarget = randomize & & BUGGIFY ;
init ( TARGET_BYTES_PER_TLOG , 2000e6 ) ; if ( smallTlogTarget ) TARGET_BYTES_PER_TLOG = 2000e3 ;
init ( SPRING_BYTES_TLOG , 400e6 ) ; if ( smallTlogTarget ) SPRING_BYTES_TLOG = 200e3 ;
init ( TLOG_SPILL_THRESHOLD , 1500e6 ) ; if ( smallTlogTarget ) TLOG_SPILL_THRESHOLD = 1500e3 ; if ( randomize & & BUGGIFY ) TLOG_SPILL_THRESHOLD = 0 ;
init ( MAX_TRANSACTIONS_PER_BYTE , 1000 ) ;
init ( MIN_FREE_SPACE , 1e8 ) ;
init ( MIN_FREE_SPACE_RATIO , 0.05 ) ;
init ( MAX_TL_SS_VERSION_DIFFERENCE , 1e99 ) ; // if( randomize && BUGGIFY ) MAX_TL_SS_VERSION_DIFFERENCE = std::max(1.0, 0.25 * VERSIONS_PER_SECOND); // spring starts at half this value //FIXME: this knob causes ratekeeper to clamp on idle cluster in simulation that have a large number of logs
init ( MAX_MACHINES_FALLING_BEHIND , 1 ) ;
//Storage Metrics
init ( STORAGE_METRICS_AVERAGE_INTERVAL , 120.0 ) ;
init ( STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS , 1000.0 / STORAGE_METRICS_AVERAGE_INTERVAL ) ; // milliHz!
init ( SPLIT_JITTER_AMOUNT , 0.05 ) ; if ( randomize & & BUGGIFY ) SPLIT_JITTER_AMOUNT = 0.2 ;
init ( IOPS_UNITS_PER_SAMPLE , 10000 * 1000 / STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS / 100 ) ;
init ( BANDWIDTH_UNITS_PER_SAMPLE , SHARD_MIN_BYTES_PER_KSEC / STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS / 25 ) ;
//Storage Server
init ( STORAGE_LOGGING_DELAY , 5.0 ) ;
init ( STORAGE_SERVER_POLL_METRICS_DELAY , 1.0 ) ;
init ( FUTURE_VERSION_DELAY , 1.0 ) ; if ( randomize & & BUGGIFY ) FUTURE_VERSION_DELAY = 0.001 ;
init ( STORAGE_LIMIT_BYTES , 500000 ) ;
init ( BUGGIFY_LIMIT_BYTES , 1000 ) ;
init ( FETCH_BLOCK_BYTES , 2e6 ) ;
init ( FETCH_KEYS_PARALLELISM_BYTES , 5e6 ) ; if ( randomize & & BUGGIFY ) FETCH_KEYS_PARALLELISM_BYTES = 4e6 ;
init ( BUGGIFY_BLOCK_BYTES , 10000 ) ;
init ( STORAGE_HARD_LIMIT_BYTES , 1500e6 ) ; if ( randomize & & BUGGIFY ) STORAGE_HARD_LIMIT_BYTES = 1500e3 ;
init ( STORAGE_COMMIT_BYTES , 10000000 ) ; if ( randomize & & BUGGIFY ) STORAGE_COMMIT_BYTES = 2000000 ;
init ( STORAGE_COMMIT_INTERVAL , 0.5 ) ; if ( randomize & & BUGGIFY ) STORAGE_COMMIT_INTERVAL = 2.0 ;
init ( UPDATE_SHARD_VERSION_INTERVAL , 0.25 ) ; if ( randomize & & BUGGIFY ) UPDATE_SHARD_VERSION_INTERVAL = 1.0 ;
init ( BYTE_SAMPLING_FACTOR , 250 ) ; //cannot buggify because of differences in restarting tests
init ( BYTE_SAMPLING_OVERHEAD , 100 ) ;
init ( MAX_STORAGE_SERVER_WATCH_BYTES , 100e6 ) ; if ( randomize & & BUGGIFY ) MAX_STORAGE_SERVER_WATCH_BYTES = 10e3 ;
init ( MAX_BYTE_SAMPLE_CLEAR_MAP_SIZE , 1e9 ) ; if ( randomize & & BUGGIFY ) MAX_BYTE_SAMPLE_CLEAR_MAP_SIZE = 1e3 ;
init ( LONG_BYTE_SAMPLE_RECOVERY_DELAY , 60.0 ) ;
//Wait Failure
init ( BUGGIFY_OUTSTANDING_WAIT_FAILURE_REQUESTS , 2 ) ;
init ( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS , 250 ) ; if ( randomize & & BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2 ;
init ( WAIT_FAILURE_DELAY_LIMIT , 1.0 ) ; if ( randomize & & BUGGIFY ) WAIT_FAILURE_DELAY_LIMIT = 5.0 ;
//Worker
init ( WORKER_LOGGING_INTERVAL , 5.0 ) ;
init ( INCOMPATIBLE_PEER_DELAY_BEFORE_LOGGING , 5.0 ) ;
// Test harness
init ( WORKER_POLL_DELAY , 1.0 ) ;
// Coordination
init ( COORDINATED_STATE_ONCONFLICT_POLL_INTERVAL , 1.0 ) ; if ( randomize & & BUGGIFY ) COORDINATED_STATE_ONCONFLICT_POLL_INTERVAL = 10.0 ;
// Buggification
init ( BUGGIFIED_EVENTUAL_CONSISTENCY , 1.0 ) ;
BUGGIFY_ALL_COORDINATION = false ; if ( randomize & & BUGGIFY ) { BUGGIFY_ALL_COORDINATION = true ; TraceEvent ( " BuggifyAllCoordination " ) ; }
// Status
init ( STATUS_MIN_TIME_BETWEEN_REQUESTS , 0.0 ) ;
init ( CONFIGURATION_ROWS_TO_FETCH , 20000 ) ;
2017-09-26 03:40:24 +08:00
// Timekeeper
2017-09-28 07:31:38 +08:00
init ( TIME_KEEPER_DELAY , 10 ) ;
2017-09-26 03:40:24 +08:00
init ( TIME_KEEPER_MAX_ENTRIES , 3600 * 24 * 30 * 6 ) ; if ( randomize & & BUGGIFY ) { TIME_KEEPER_MAX_ENTRIES = 2 ; }
2017-05-26 04:48:44 +08:00
if ( clientKnobs )
clientKnobs - > IS_ACCEPTABLE_DELAY = clientKnobs - > IS_ACCEPTABLE_DELAY * std : : min ( MAX_READ_TRANSACTION_LIFE_VERSIONS , MAX_WRITE_TRANSACTION_LIFE_VERSIONS ) / ( 5.0 * VERSIONS_PER_SECOND ) ;
}