Merge branch 'master' into fix-machine-id-parameter

This commit is contained in:
Evan Tschannen 2019-07-30 17:54:33 -07:00 committed by GitHub
commit 3b9e5aa651
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 288 additions and 94 deletions

View File

@ -15,13 +15,22 @@ fi
# Step 1: glibc version
FAILED=0
for i in $(objdump -T "$1" | awk '{print $5}' | grep GLIBC | sed 's/ *$//g' | sed 's/GLIBC_//' | sort | uniq); do
if ! verlte "$i" "$2"; then
echo "!!! WARNING: DEPENDENCY ON NEWER LIBC DETECTED !!!"
exit 1
if [[ $FAILED == 0 ]]; then
echo "!!! WARNING: DEPENDENCY ON NEWER LIBC DETECTED !!!"
fi
objdump -T "$1" | grep GLIBC_$i | awk '{print $5 " " $6}' | grep "^GLIBC" | sort | awk '$0="\t"$0'
FAILED=1
fi
done
if [[ $FAILED == 1 ]]; then
exit 1
fi
# Step 2: Other dynamic dependencies
for j in $(objdump -p "$1" | grep NEEDED | awk '{print $2}'); do

View File

@ -406,10 +406,8 @@ The following options apply to all commands:
``--blob_credentials <FILE>``
Use FILE as a :ref:`Blob Credential File<blob-credential-files>`. Can be used multiple times.
The following options apply to all commands except ``start``:
``-C <CLUSTER_FILE>``
Path to the cluster file that should be used to connect to the FoundationDB cluster you want to use. If not specified, a :ref:`default cluster file <default-cluster-file>` will be used.
``--dest_cluster_file <CONNFILE>``
Required. Path to the cluster file that should be used to connect to the FoundationDB cluster you are restoring to.
.. _restore-start:
@ -424,10 +422,6 @@ The ``start`` command will start a new restore on the specified (or default) tag
``-r <BACKUP_URL>``
Required. Specifies the Backup URL for the source backup data to restore to the database. The source data must be accessible by the ``backup_agent`` processes for the cluster.
``--dest_cluster_file <CONNFILE>``
Required. The backup data will be restored into this cluster.
``-w``
Wait for the restore to reach a final state (such as complete) before exiting. Prints a progress update every few seconds. Behavior is identical to that of the wait command.

View File

@ -10,38 +10,38 @@ macOS
The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server.
* `FoundationDB-6.1.11.pkg <https://www.foundationdb.org/downloads/6.1.11/macOS/installers/FoundationDB-6.1.11.pkg>`_
* `FoundationDB-6.1.12.pkg <https://www.foundationdb.org/downloads/6.1.12/macOS/installers/FoundationDB-6.1.12.pkg>`_
Ubuntu
------
The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x.
* `foundationdb-clients-6.1.11-1_amd64.deb <https://www.foundationdb.org/downloads/6.1.11/ubuntu/installers/foundationdb-clients_6.1.11-1_amd64.deb>`_
* `foundationdb-server-6.1.11-1_amd64.deb <https://www.foundationdb.org/downloads/6.1.11/ubuntu/installers/foundationdb-server_6.1.11-1_amd64.deb>`_ (depends on the clients package)
* `foundationdb-clients-6.1.12-1_amd64.deb <https://www.foundationdb.org/downloads/6.1.12/ubuntu/installers/foundationdb-clients_6.1.12-1_amd64.deb>`_
* `foundationdb-server-6.1.12-1_amd64.deb <https://www.foundationdb.org/downloads/6.1.12/ubuntu/installers/foundationdb-server_6.1.12-1_amd64.deb>`_ (depends on the clients package)
RHEL/CentOS EL6
---------------
The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x.
* `foundationdb-clients-6.1.11-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.11/rhel6/installers/foundationdb-clients-6.1.11-1.el6.x86_64.rpm>`_
* `foundationdb-server-6.1.11-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.11/rhel6/installers/foundationdb-server-6.1.11-1.el6.x86_64.rpm>`_ (depends on the clients package)
* `foundationdb-clients-6.1.12-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.12/rhel6/installers/foundationdb-clients-6.1.12-1.el6.x86_64.rpm>`_
* `foundationdb-server-6.1.12-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.12/rhel6/installers/foundationdb-server-6.1.12-1.el6.x86_64.rpm>`_ (depends on the clients package)
RHEL/CentOS EL7
---------------
The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x.
* `foundationdb-clients-6.1.11-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.11/rhel7/installers/foundationdb-clients-6.1.11-1.el7.x86_64.rpm>`_
* `foundationdb-server-6.1.11-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.11/rhel7/installers/foundationdb-server-6.1.11-1.el7.x86_64.rpm>`_ (depends on the clients package)
* `foundationdb-clients-6.1.12-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.12/rhel7/installers/foundationdb-clients-6.1.12-1.el7.x86_64.rpm>`_
* `foundationdb-server-6.1.12-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.12/rhel7/installers/foundationdb-server-6.1.12-1.el7.x86_64.rpm>`_ (depends on the clients package)
Windows
-------
The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server.
* `foundationdb-6.1.11-x64.msi <https://www.foundationdb.org/downloads/6.1.11/windows/installers/foundationdb-6.1.11-x64.msi>`_
* `foundationdb-6.1.12-x64.msi <https://www.foundationdb.org/downloads/6.1.12/windows/installers/foundationdb-6.1.12-x64.msi>`_
API Language Bindings
=====================
@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part
If you need to use the FoundationDB Python API from other Python installations or paths, download the Python package:
* `foundationdb-6.1.11.tar.gz <https://www.foundationdb.org/downloads/6.1.11/bindings/python/foundationdb-6.1.11.tar.gz>`_
* `foundationdb-6.1.12.tar.gz <https://www.foundationdb.org/downloads/6.1.12/bindings/python/foundationdb-6.1.12.tar.gz>`_
Ruby 1.9.3/2.0.0+
-----------------
* `fdb-6.1.11.gem <https://www.foundationdb.org/downloads/6.1.11/bindings/ruby/fdb-6.1.11.gem>`_
* `fdb-6.1.12.gem <https://www.foundationdb.org/downloads/6.1.12/bindings/ruby/fdb-6.1.12.gem>`_
Java 8+
-------
* `fdb-java-6.1.11.jar <https://www.foundationdb.org/downloads/6.1.11/bindings/java/fdb-java-6.1.11.jar>`_
* `fdb-java-6.1.11-javadoc.jar <https://www.foundationdb.org/downloads/6.1.11/bindings/java/fdb-java-6.1.11-javadoc.jar>`_
* `fdb-java-6.1.12.jar <https://www.foundationdb.org/downloads/6.1.12/bindings/java/fdb-java-6.1.12.jar>`_
* `fdb-java-6.1.12-javadoc.jar <https://www.foundationdb.org/downloads/6.1.12/bindings/java/fdb-java-6.1.12-javadoc.jar>`_
Go 1.11+
--------

View File

@ -2,6 +2,15 @@
Release Notes
#############
6.1.12
======
Fixes
-----
* Fixed a thread safety issue while writing large keys or values. `(Issue #1846) <https://github.com/apple/foundationdb/issues/1846>`_
* An untracked data distributor could prevent a newly recruited data distributor from being started. `(PR #1849) <https://github.com/apple/foundationdb/pull/1849>`_
6.1.11
======

View File

@ -7,21 +7,25 @@ Release Notes
Features
--------
* Improved team collection for data distribution that builds a balanced number of teams per server and gurantees that each server has at least one team. `(PR #1785) <https://github.com/apple/foundationdb/pull/1785>`_.
* Added the option to have data distribution FetchKeys to run at a lower priority by setting the knob ``FETCH_KEYS_LOWER_PRIORITY`` `(PR #1791) <https://github.com/apple/foundationdb/pull/1791>`_.
* CMake is now our official build system. The Makefile based build system is deprecated.
* Added local ratekeeper, to throttle reads at a per-storage-process level. `(PR #1447) <https://github.com/apple/foundationdb/pull/1477>`_.
* FDB backups based on disk snapshots, provides an ability to take cluster level backup based on disk level snapshots of storage, tlogs and coordinators. `(PR #1733) <https://github.com/apple/foundationdb/pull/1733>`_.
* Foundationdb now uses the flatbuffers serialization format for all network messages by default. This can be controlled with the ``--object-serializer`` cli argument or ``use_object_serializer`` network option. Note that network communications only work if each peer uses the same object serializer setting. `(PR 1090) <https://github.com/apple/foundationdb/pull/1090>`_.
Performance
-----------
* Use CRC32 checksum for SQLite pages. `(PR #1582) <https://github.com/apple/foundationdb/pull/1582>`_.
* Added a 96-byte fast allocator, so storage queue nodes use less memory. `(PR #1336) <https://github.com/apple/foundationdb/pull/1336>`_.
* Handle large packets better. `(PR #1684) <https://github.com/apple/foundationdb/pull/1684>`_.
* A new Transaction Log spilling implementation is now the default. Write bandwidth and latency will no longer degrade during storage server or remote region failures. `(PR #1731) <https://github.com/apple/foundationdb/pull/1731>`_.
* Log routers will prefer to peek from satellites at ``log_version >= 4``. `(PR #1795) <https://github.com/apple/foundationdb/pull/1795>`_.
* Spilled data can be consumed from transaction logs more quickly and with less overhead. `(PR #1584) <https://github.com/apple/foundationdb/pull/1584>`_.
* Improved the speed of recoveries on large clusters. `(PR #1729) <https://github.com/apple/foundationdb/pull/1729>`_.
Fixes
-----
@ -32,6 +36,7 @@ Fixes
* Do not set doBuildTeams in StorageServerTracker unless a storage server's interface changes, in order to avoid unnecessary work. `(PR #1779) <https://github.com/apple/foundationdb/pull/1779>`_.
* Data distribution will now pick a random destination when merging shards in the ``\xff`` keyspace. This avoids an issue with backup where the write-heavy mutation log shards could concentrate on a single process that has less data than everybody else. `(PR #1916) <https://github.com/apple/foundationdb/pull/1916>`_.
* Setting ``--machine_id`` (or ``-i``) for an ``fdbserver`` process now sets ``locality_machineid`` in addition to ``locality_zoneid``. `(PR #1928) <https://github.com/apple/foundationdb/pull/1928>`_.
* File descriptors opened by clients and servers set close-on-exec, if available on the platform. `(PR #1581) <https://github.com/apple/foundationdb/pull/1581>`_.
Status
------
@ -69,6 +74,7 @@ Other Changes
* Added two knobs ``LOAD_BALANCE_ZONE_ID_LOCALITY_ENABLED`` and ``LOAD_BALANCE_DC_ID_LOCALITY_ENABLED`` allowing locality-based decision-making to be toggled on/off during load balancing. `(PR #1820) <https://github.com/apple/foundationdb/pull/1820>`_.
* Ratekeeper will aggressively throttle when unable to fetch the list of storage servers for a considerable period of time. `(PR #1858) <https://github.com/apple/foundationdb/pull/1858>`_.
* ``fdbserver`` now accepts a comma separated list of public and listen addresses. `(PR #1721) <https://github.com/apple/foundationdb/pull/1721>`_.
* ``CAUSAL_READ_RISKY`` has been enhanced to further reduce the chance of causally inconsistent reads. Existing users of ``CAUSAL_READ_RISKY`` may see increased GRV latency if proxies are distantly located from logs. `(PR #1841) <https://github.com/apple/foundationdb/pull/1841>`_.
Earlier release notes
---------------------

View File

@ -944,7 +944,7 @@ static void printBackupUsage(bool devhelp) {
printf(" -e ERRORLIMIT The maximum number of errors printed by status (default is 10).\n");
printf(" -k KEYS List of key ranges to backup.\n"
" If not specified, the entire database will be backed up.\n");
printf(" -n, --dryrun For start or restore operations, performs a trial run with no actual changes made.\n");
printf(" -n, --dryrun For backup start or restore start, performs a trial run with no actual changes made.\n");
printf(" --log Enables trace file logging for the CLI session.\n"
" --logdir PATH Specifes the output directory for trace files. If\n"
" unspecified, defaults to the current directory. Has\n"
@ -3485,27 +3485,31 @@ int main(int argc, char* argv[]) {
break;
case EXE_RESTORE:
if(dryRun) {
if(restoreType != RESTORE_START) {
fprintf(stderr, "Restore dry run only works for 'start' command\n");
return FDB_EXIT_ERROR;
}
// Must explicitly call trace file options handling if not calling Database::createDatabase()
initTraceFile();
}
else if(restoreType != RESTORE_START && !initCluster()) {
return FDB_EXIT_ERROR;
}
else {
if(restoreClusterFileDest.empty()) {
fprintf(stderr, "Restore destination cluster file must be specified explicitly.\n");
return FDB_EXIT_ERROR;
}
if(restoreClusterFileDest.empty()) {
fprintf(stderr, "Restore destination cluster file must be specified explicitly.\n");
return FDB_EXIT_ERROR;
}
if(!fileExists(restoreClusterFileDest)) {
fprintf(stderr, "Restore destination cluster file '%s' does not exist.\n", restoreClusterFileDest.c_str());
return FDB_EXIT_ERROR;
}
if(!fileExists(restoreClusterFileDest)) {
fprintf(stderr, "Restore destination cluster file '%s' does not exist.\n", restoreClusterFileDest.c_str());
return FDB_EXIT_ERROR;
}
try {
db = Database::createDatabase(restoreClusterFileDest, Database::API_VERSION_LATEST);
} catch(Error &e) {
fprintf(stderr, "Restore destination cluster file '%s' invalid: %s\n", restoreClusterFileDest.c_str(), e.what());
return FDB_EXIT_ERROR;
try {
db = Database::createDatabase(restoreClusterFileDest, Database::API_VERSION_LATEST);
} catch(Error &e) {
fprintf(stderr, "Restore destination cluster file '%s' invalid: %s\n", restoreClusterFileDest.c_str(), e.what());
return FDB_EXIT_ERROR;
}
}
switch(restoreType) {

View File

@ -45,6 +45,7 @@ ClientKnobs::ClientKnobs(bool randomize) {
init( COORDINATOR_RECONNECTION_DELAY, 1.0 );
init( CLIENT_EXAMPLE_AMOUNT, 20 );
init( MAX_CLIENT_STATUS_AGE, 1.0 );
init( MAX_CLIENT_PROXY_CONNECTIONS, 5 ); if( randomize && BUGGIFY ) MAX_CLIENT_PROXY_CONNECTIONS = 1;
// wrong_shard_server sometimes comes from the only nonfailed server, so we need to avoid a fast spin

View File

@ -44,6 +44,7 @@ public:
double COORDINATOR_RECONNECTION_DELAY;
int CLIENT_EXAMPLE_AMOUNT;
double MAX_CLIENT_STATUS_AGE;
int MAX_CLIENT_PROXY_CONNECTIONS;
// wrong_shard_server sometimes comes from the only nonfailed server, so we need to avoid a fast spin
double WRONG_SHARD_SERVER_DELAY; // SOMEDAY: This delay can limit performance of retrieving data when the cache is mostly wrong (e.g. dumping the database after a test)

View File

@ -667,6 +667,8 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration( Reference<ClusterCo
state int idx = 0;
state int successIdx = 0;
state Optional<double> incorrectTime;
state std::vector<UID> lastProxyUIDs;
deterministicRandom()->randomShuffle(addrs);
loop {
state ClientLeaderRegInterface clientLeaderServer( addrs[idx] );
@ -716,6 +718,22 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration( Reference<ClusterCo
info.hasConnected = true;
connFile->notifyConnected();
auto& ni = rep.get();
if(ni.proxies.size() > CLIENT_KNOBS->MAX_CLIENT_PROXY_CONNECTIONS) {
std::vector<UID> proxyUIDs;
for(auto& proxy : ni.proxies) {
proxyUIDs.push_back(proxy.id());
}
if(proxyUIDs != lastProxyUIDs) {
lastProxyUIDs = proxyUIDs;
deterministicRandom()->randomShuffle(ni.proxies);
ni.proxies.resize(CLIENT_KNOBS->MAX_CLIENT_PROXY_CONNECTIONS);
for(int i = 0; i < ni.proxies.size(); i++) {
TraceEvent("ClientConnectedProxy").detail("Proxy", ni.proxies[i].id());
}
}
}
clientInfo->set( rep.get() );
successIdx = idx;
} else if(idx == successIdx) {

View File

@ -729,11 +729,15 @@ ACTOR static Future<Void> switchConnectionFileImpl(Reference<ClusterConnectionFi
// Reset state from former cluster.
self->masterProxies.clear();
self->masterProxiesChangeTrigger.trigger();
self->minAcceptableReadVersion = std::numeric_limits<Version>::max();
self->invalidateCache(allKeys);
auto clearedClientInfo = self->clientInfo->get();
clearedClientInfo.proxies.clear();
clearedClientInfo.id = deterministicRandom()->randomUniqueID();
self->clientInfo->set(clearedClientInfo);
self->connectionFile->set(connFile);
state Database db(Reference<DatabaseContext>::addRef(self));
state Transaction tr(db);
loop {

View File

@ -1022,23 +1022,71 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
.detail("MachineMaxTeams", maxMachineTeams);
}
bool teamExists( vector<UID> &team ) {
int overlappingMembers( vector<UID> &team ) {
if (team.empty()) {
return false;
return 0;
}
int maxMatchingServers = 0;
UID& serverID = team[0];
for (auto& usedTeam : server_info[serverID]->teams) {
if (team == usedTeam->getServerIDs()) {
return true;
auto used = usedTeam->getServerIDs();
int teamIdx = 0;
int usedIdx = 0;
int matchingServers = 0;
while(teamIdx < team.size() && usedIdx < used.size()) {
if(team[teamIdx] == used[usedIdx]) {
matchingServers++;
teamIdx++;
usedIdx++;
} else if(team[teamIdx] < used[usedIdx]) {
teamIdx++;
} else {
usedIdx++;
}
}
ASSERT(matchingServers > 0);
maxMatchingServers = std::max(maxMatchingServers, matchingServers);
if(maxMatchingServers == team.size()) {
return maxMatchingServers;
}
}
return false;
return maxMatchingServers;
}
// SOMEDAY: when machineTeams is changed from vector to set, we may check the existance faster
bool machineTeamExists(vector<Standalone<StringRef>>& machineIDs) { return findMachineTeam(machineIDs).isValid(); }
int overlappingMachineMembers( vector<Standalone<StringRef>>& team ) {
if (team.empty()) {
return 0;
}
int maxMatchingServers = 0;
Standalone<StringRef>& serverID = team[0];
for (auto& usedTeam : machine_info[serverID]->machineTeams) {
auto used = usedTeam->machineIDs;
int teamIdx = 0;
int usedIdx = 0;
int matchingServers = 0;
while(teamIdx < team.size() && usedIdx < used.size()) {
if(team[teamIdx] == used[usedIdx]) {
matchingServers++;
teamIdx++;
usedIdx++;
} else if(team[teamIdx] < used[usedIdx]) {
teamIdx++;
} else {
usedIdx++;
}
}
ASSERT(matchingServers > 0);
maxMatchingServers = std::max(maxMatchingServers, matchingServers);
if(maxMatchingServers == team.size()) {
return maxMatchingServers;
}
}
return maxMatchingServers;
}
Reference<TCMachineTeamInfo> findMachineTeam(vector<Standalone<StringRef>>& machineIDs) {
if (machineIDs.empty()) {
@ -1421,10 +1469,12 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
ASSERT_WE_THINK(isMachineTeamHealthy(machineIDs));
std::sort(machineIDs.begin(), machineIDs.end());
if (machineTeamExists(machineIDs)) {
int overlap = overlappingMachineMembers(machineIDs);
if (overlap == machineIDs.size()) {
maxAttempts += 1;
continue;
}
score += SERVER_KNOBS->DD_OVERLAP_PENALTY*overlap;
// SOMEDAY: randomly pick one from teams with the lowest score
if (score < bestScore) {
@ -1853,7 +1903,8 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
ASSERT(serverTeam.size() == configuration.storageTeamSize);
std::sort(serverTeam.begin(), serverTeam.end());
if (teamExists(serverTeam)) {
int overlap = overlappingMembers(serverTeam);
if (overlap == serverTeam.size()) {
maxAttempts += 1;
continue;
}
@ -1861,7 +1912,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
// Pick the server team with smallest score in all attempts
// If we use different metric here, DD may oscillate infinitely in creating and removing teams.
// SOMEDAY: Improve the code efficiency by using reservoir algorithm
int score = 0;
int score = SERVER_KNOBS->DD_OVERLAP_PENALTY*overlap;
for (auto& server : serverTeam) {
score += server_info[server]->teams.size();
}
@ -2023,7 +2074,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
.detail("MachineTeamCount", self->machineTeams.size())
.detail("MachineCount", self->machine_info.size())
.detail("DesiredTeamsPerServer", SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER);
self->lastBuildTeamsFailed = false;
if (teamsToBuild > 0 || self->notEnoughTeamsForAServer()) {
state vector<std::vector<UID>> builtTeams;

View File

@ -1111,13 +1111,30 @@ ACTOR Future<bool> rebalanceTeams( DDQueueData* self, int priority, Reference<ID
return false;
}
std::vector<KeyRange> shards = self->shardsAffectedByTeamFailure->getShardsFor( ShardsAffectedByTeamFailure::Team( sourceTeam->getServerIDs(), primary ) );
Promise<int64_t> req;
self->getAverageShardBytes.send( req );
state int64_t averageShardBytes = wait(req.getFuture());
state std::vector<KeyRange> shards = self->shardsAffectedByTeamFailure->getShardsFor( ShardsAffectedByTeamFailure::Team( sourceTeam->getServerIDs(), primary ) );
if( !shards.size() )
return false;
state KeyRange moveShard = deterministicRandom()->randomChoice( shards );
StorageMetrics metrics = wait( brokenPromiseToNever( self->getShardMetrics.getReply(GetMetricsRequest(moveShard)) ) );
state KeyRange moveShard;
state StorageMetrics metrics;
state int retries = 0;
while(retries < SERVER_KNOBS->REBALANCE_MAX_RETRIES) {
state KeyRange testShard = deterministicRandom()->randomChoice( shards );
StorageMetrics testMetrics = wait( brokenPromiseToNever( self->getShardMetrics.getReply(GetMetricsRequest(testShard)) ) );
if(testMetrics.bytes > metrics.bytes) {
moveShard = testShard;
metrics = testMetrics;
if(metrics.bytes > averageShardBytes) {
break;
}
}
retries++;
}
int64_t sourceBytes = sourceTeam->getLoadBytes(false);
int64_t destBytes = destTeam->getLoadBytes();
@ -1133,6 +1150,7 @@ ACTOR Future<bool> rebalanceTeams( DDQueueData* self, int priority, Reference<ID
.detail("SourceBytes", sourceBytes)
.detail("DestBytes", destBytes)
.detail("ShardBytes", metrics.bytes)
.detail("AverageShardBytes", averageShardBytes)
.detail("SourceTeam", sourceTeam->getDesc())
.detail("DestTeam", destTeam->getDesc());

View File

@ -182,6 +182,8 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( DEBOUNCE_RECRUITING_DELAY, 5.0 );
init( DD_FAILURE_TIME, 1.0 ); if( randomize && BUGGIFY ) DD_FAILURE_TIME = 10.0;
init( DD_ZERO_HEALTHY_TEAM_DELAY, 1.0 );
init( REBALANCE_MAX_RETRIES, 100 );
init( DD_OVERLAP_PENALTY, 10000 );
// TeamRemover
TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER = false; if( randomize && BUGGIFY ) TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER = deterministicRandom()->random01() < 0.1 ? true : false; // false by default. disable the consistency check when it's true

View File

@ -141,6 +141,8 @@ public:
int64_t DD_LOCATION_CACHE_SIZE;
double MOVEKEYS_LOCK_POLLING_DELAY;
double DEBOUNCE_RECRUITING_DELAY;
int REBALANCE_MAX_RETRIES;
int DD_OVERLAP_PENALTY;
// TeamRemover to remove redundant teams
bool TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER; // disable the machineTeamRemover actor

View File

@ -62,7 +62,7 @@ bool setDDEnabled(bool status, UID snapUID) {
return true;
}
ACTOR Future<MoveKeysLock> takeMoveKeysLock( Database cx, UID masterId ) {
ACTOR Future<MoveKeysLock> takeMoveKeysLock( Database cx, UID ddId ) {
state Transaction tr(cx);
loop {
try {
@ -70,7 +70,7 @@ ACTOR Future<MoveKeysLock> takeMoveKeysLock( Database cx, UID masterId ) {
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
if( !g_network->isSimulated() ) {
UID id(deterministicRandom()->randomUniqueID());
TraceEvent("TakeMoveKeysLockTransaction", masterId)
TraceEvent("TakeMoveKeysLockTransaction", ddId)
.detail("TransactionUID", id);
tr.debugTransaction( id );
}
@ -83,6 +83,8 @@ ACTOR Future<MoveKeysLock> takeMoveKeysLock( Database cx, UID masterId ) {
lock.prevWrite = readVal.present() ? BinaryReader::fromStringRef<UID>(readVal.get(), Unversioned()) : UID();
}
lock.myOwner = deterministicRandom()->randomUniqueID();
tr.set(moveKeysLockOwnerKey, BinaryWriter::toValue(lock.myOwner, Unversioned()));
wait(tr.commit());
return lock;
} catch (Error &e){
wait(tr.onError(e));

View File

@ -37,15 +37,14 @@ struct MoveKeysLock {
void serialize(Ar& ar) { serializer(ar, prevOwner, myOwner, prevWrite); }
};
ACTOR Future<MoveKeysLock> takeMoveKeysLock(Database cx, UID masterId);
// Calling moveKeys, etc with the return value of this actor ensures that no movekeys, etc
// has been executed by a different locker since takeMoveKeysLock().
// takeMoveKeysLock itself is a read-only operation - it does not conflict with other
// attempts to take the lock.
// has been executed by a different locker since takeMoveKeysLock(), as calling
// takeMoveKeysLock() updates "moveKeysLockOwnerKey" to a random UID.
ACTOR Future<MoveKeysLock> takeMoveKeysLock(Database cx, UID ddId);
Future<Void> checkMoveKeysLockReadOnly( Transaction* tr, MoveKeysLock lock );
// Checks that the a moveKeysLock has not changed since having taken it
// This does not modify the moveKeysLock
Future<Void> checkMoveKeysLockReadOnly(Transaction* tr, MoveKeysLock lock);
bool isDDEnabled();
// checks if the in-memory DDEnabled flag is set

View File

@ -1214,6 +1214,10 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
}
ACTOR Future<Void> watchDegraded(TLogData* self) {
if(g_network->isSimulated() && g_simulator.speedUpSimulation) {
return Void();
}
//This delay is divided into multiple delays to avoid marking the tlog as degraded because of a single SlowTask
state int loopCount = 0;
while(loopCount < SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT) {

View File

@ -492,7 +492,11 @@ ACTOR Future<Void> repairDeadDatacenter(Database cx, Reference<AsyncVar<ServerDB
bool primaryDead = g_simulator.datacenterDead(g_simulator.primaryDcId);
bool remoteDead = g_simulator.datacenterDead(g_simulator.remoteDcId);
ASSERT(!primaryDead || !remoteDead);
//FIXME: the primary and remote can both be considered dead because excludes are not handled properly by the datacenterDead function
if(primaryDead && remoteDead) {
TraceEvent(SevWarnAlways, "CannotDisableFearlessConfiguration");
return Void();
}
if(primaryDead || remoteDead) {
TraceEvent(SevWarnAlways, "DisablingFearlessConfiguration").detail("Location", context).detail("Stage", "Repopulate").detail("RemoteDead", remoteDead).detail("PrimaryDead", primaryDead);
g_simulator.usableRegions = 1;

View File

@ -1589,6 +1589,10 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
}
ACTOR Future<Void> watchDegraded(TLogData* self) {
if(g_network->isSimulated() && g_simulator.speedUpSimulation) {
return Void();
}
//This delay is divided into multiple delays to avoid marking the tlog as degraded because of a single SlowTask
state int loopCount = 0;
while(loopCount < SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT) {

View File

@ -259,7 +259,7 @@ struct ArenaBlock : NonCopyable, ThreadSafeReferenceCounted<ArenaBlock>
if(FLOW_KNOBS && g_trace_depth == 0 && nondeterministicRandom()->random01() < (reqSize / FLOW_KNOBS->HUGE_ARENA_LOGGING_BYTES)) {
hugeArenaSample(reqSize);
}
g_hugeArenaMemory += reqSize;
g_hugeArenaMemory.fetch_add(reqSize);
// If the new block has less free space than the old block, make the old block depend on it
if (next && !next->isTiny() && next->unused() >= reqSize-dataSize) {
@ -296,7 +296,7 @@ struct ArenaBlock : NonCopyable, ThreadSafeReferenceCounted<ArenaBlock>
#ifdef ALLOC_INSTRUMENTATION
allocInstr[ "ArenaHugeKB" ].dealloc( (bigSize+1023)>>10 );
#endif
g_hugeArenaMemory -= bigSize;
g_hugeArenaMemory.fetch_sub(bigSize);
delete[] (uint8_t*)this;
}
}

View File

@ -82,21 +82,23 @@ void setFastAllocatorThreadInitFunction( ThreadInitFunction f ) {
threadInitFunction = f;
}
int64_t g_hugeArenaMemory = 0;
std::atomic<int64_t> g_hugeArenaMemory(0);
double hugeArenaLastLogged = 0;
std::map<std::string, std::pair<int,int>> hugeArenaTraces;
void hugeArenaSample(int size) {
auto& info = hugeArenaTraces[platform::get_backtrace()];
info.first++;
info.second+=size;
if(now() - hugeArenaLastLogged > FLOW_KNOBS->HUGE_ARENA_LOGGING_INTERVAL) {
for(auto& it : hugeArenaTraces) {
TraceEvent("HugeArenaSample").detail("Count", it.second.first).detail("Size", it.second.second).detail("Backtrace", it.first);
if(TraceEvent::isNetworkThread()) {
auto& info = hugeArenaTraces[platform::get_backtrace()];
info.first++;
info.second+=size;
if(now() - hugeArenaLastLogged > FLOW_KNOBS->HUGE_ARENA_LOGGING_INTERVAL) {
for(auto& it : hugeArenaTraces) {
TraceEvent("HugeArenaSample").detail("Count", it.second.first).detail("Size", it.second.second).detail("Backtrace", it.first);
}
hugeArenaLastLogged = now();
hugeArenaTraces.clear();
}
hugeArenaLastLogged = now();
hugeArenaTraces.clear();
}
}

View File

@ -40,6 +40,7 @@
#include "flow/Hash3.h"
#include <assert.h>
#include <atomic>
#include <vector>
#include <cstdlib>
#include <cstdio>
@ -152,7 +153,7 @@ private:
static void releaseMagazine(void*);
};
extern int64_t g_hugeArenaMemory;
extern std::atomic<int64_t> g_hugeArenaMemory;
void hugeArenaSample(int size);
void releaseAllThreadMagazines();
int64_t getTotalUnusedAllocatedMemory();

View File

@ -297,7 +297,23 @@ you are holding the corresponding future.
### Flatbuffers/ObjectSerializer
1. Motivation and Goals
1. Introduction
The goal is to have a more robust serialization protocol. One feature of
flatbuffers is that you can add a new field to a network message without
requiring a protocol-incompatible upgrade. In order for this to work,
correctness must not depend on that field always being present. This can be
tested in simulation by randomly (use buggify) default-initializing that
field when deserializing. Once you make a protocol-incompatible upgrade you
can rely on the field always being present in the new protocol, just like
before. Currently we are using a custom flatbuffers implementation so to
that we can present (roughly) the same serialization api as before.
Currently the ObjectSerializer is only used for network messages, but that
may change. Flatbuffers was selected because it is (relatively) simple
among protocols providing forwards/backwards compatibility, and its binary
format is [well
documented](https://github.com/dvidelabs/flatcc/blob/master/doc/binary-format.md)
1. Correspondence to flatbuffers IDL
- Tables
```
@ -319,7 +335,7 @@ you are holding the corresponding future.
- Unions
```
// Flow type
using T = std::variant<A, B, C>;
using T = boost::variant<A, B, C>;
// IDL equivalent
union T { A, B, C}
@ -341,18 +357,59 @@ you are holding the corresponding future.
[T]
```
TODO finish documenting/implementing the following.
1. Vtables collected from default-constructed instances
1. Requirements (serialize must be cheap for a default-constructed instance, must have a serialize method or implement a trait.)
1. Traits/Concepts: vector_like, union_like, dynamic_size, scalar
1. isDeserializing idiom
1. Gotchas (serialize gets called more than once on save path, maybe more)
1. Flatbuffers Traits
In order to serialize a type as a flatbuffers vector, struct, or union, you can implement the appropriate trait for your type.
- `scalar_traits` corresponds to a flatbuffers struct. See `UID` for an example.
- `vector_like_traits` corresponds to a flatbuffers vector. See `VectorRef` for an example.
- `dynamic_size_traits` corresponds to a flatbuffers vector of uint8_t. See `StringRef` for an example.
- `union_like_traits` corresponds to a flatbuffers union. See `boost::variant` for an example.
1. Potential Gotchas
- Flatbuffers 'vtables' are collected from default-constructed instances of
each type. Consequently types serialized by flatbuffers should have cheap
default constructors. Future work: we may be able to collect vtables
without an instance of a type using `declval`.
- `T::serialize` may get called multiple times when serializing `T`. It is
guaranteed to be called only once for deserialization though, and thus
the `Ar::isDeserializing` idiom is appropriate. Future work: in theory we
don't need to call `T::serialize` multiple times when serializing, but
this would complicate the implementation.
- In a call to `serializer`, arenas must come after any members whose memory
the arena owns. It's safe to reorder an arena in a `serializer` call
because arenas are ignored for the flatbuffers schema. (Future work)
Enforce that no fields appear after an arena at compile time.
1. File identifiers
[File identifiers](https://google.github.io/flatbuffers/md__schemas.html)
are used to sanity check that the message you're deserializing is of the
schema you expect. You can give a type `T` a file identifier by making
`T::file_identifier` a static member of type `FileIdentifier`. If you don't
control `T`, you can specialize the `FileIdentifierFor` template. See
`flow/FileIdentifier.h` for examples. You don't need to change the file
identifier for a type when evolving its schema.
1. Schema evolution
1. Testing plan: have buggify sometimes default initialize fields that are introduced without changing the protocol version.
1. (Future work) Allow ObjectSerializer to take the usual version specifications, `IncludeVersion`, `AssumeVersion`, or `Unversioned`.
1. (Future work) Smaller messages for deprecated fields
1. (Future work) `Deprecated<...>` template that knows whether or not the field was present? Automatically buggifies the field being absent?
Two schemas are forward/backward compatible if they meet the following
requirements. (Future work) Any fields that are not common to both schemas should be
default-initialized in deserialized messages. Currently they will be
uninitialized if their default constructor doesn't initialize.
- Two tables are compatible if one table's fields are all compatible with a prefix of the other table's fields.
- Two vectors are compatible if their element types are compatible.
- Two unions are compatible if one union's fields are all compatible with a prefix of the other union's fields.
- Two scalar types are only compatible if they are equal.
1. Deprecation
Flatbuffers allows fields to be deprecated, and a deprecated field consumes
only two bytes on the wire. (Future work) Introduce `Deprecated<...>`
template or something similar so that we can write smaller messages for
deprecated fields.
### ACTOR return values

View File

@ -115,7 +115,7 @@ SystemStatistics customSystemMonitor(std::string eventName, StatisticsState *sta
.DETAILALLOCATORMEMUSAGE(2048)
.DETAILALLOCATORMEMUSAGE(4096)
.DETAILALLOCATORMEMUSAGE(8192)
.detail("HugeArenaMemory", g_hugeArenaMemory);
.detail("HugeArenaMemory", g_hugeArenaMemory.load());
TraceEvent n("NetworkMetrics");
n

View File

@ -43,7 +43,7 @@
#undef min
#endif
int g_trace_depth = 0;
thread_local int g_trace_depth = 0;
class DummyThreadPool : public IThreadPool, ReferenceCounted<DummyThreadPool> {
public:

View File

@ -42,7 +42,7 @@ inline int fastrand() {
//inline static bool TRACE_SAMPLE() { return fastrand()<16; }
inline static bool TRACE_SAMPLE() { return false; }
extern int g_trace_depth;
extern thread_local int g_trace_depth;
enum Severity {
SevSample=1,

View File

@ -32,7 +32,7 @@
<Wix xmlns='http://schemas.microsoft.com/wix/2006/wi'>
<Product Name='$(var.Title)'
Id='{58285A17-7601-4E68-B41C-E6BD0ED36743}'
Id='{B713B0DA-1E6D-4F25-914B-6014E7C3710D}'
UpgradeCode='{A95EA002-686E-4164-8356-C715B7F8B1C8}'
Version='$(var.Version)'
Manufacturer='$(var.Manufacturer)'

View File

@ -2,10 +2,12 @@
<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(SolutionDir)versions.target" />
<PropertyGroup Condition="'$(Release)' != 'true' ">
<SuppressValidation>true</SuppressValidation>
<PreReleaseDecoration>-PRERELEASE</PreReleaseDecoration>
<PreReleaseVersion>.0</PreReleaseVersion>
</PropertyGroup>
<PropertyGroup Condition="'$(Release)' == 'true' ">
<SuppressValidation>true</SuppressValidation>
<PreReleaseDecoration>
</PreReleaseDecoration>
<PreReleaseVersion>.1</PreReleaseVersion>