Merge branch 'release-6.2' of github.com:apple/foundationdb into feature-redwood

This commit is contained in:
Stephen Atherton 2019-10-23 10:12:54 -07:00
commit 0e51a248b4
45 changed files with 988 additions and 462 deletions

View File

@ -18,7 +18,7 @@
# limitations under the License. # limitations under the License.
cmake_minimum_required(VERSION 3.12) cmake_minimum_required(VERSION 3.12)
project(foundationdb project(foundationdb
VERSION 6.2.5 VERSION 6.2.7
DESCRIPTION "FoundationDB is a scalable, fault-tolerant, ordered key-value store with full ACID transactions." DESCRIPTION "FoundationDB is a scalable, fault-tolerant, ordered key-value store with full ACID transactions."
HOMEPAGE_URL "http://www.foundationdb.org/" HOMEPAGE_URL "http://www.foundationdb.org/"
LANGUAGES C CXX ASM) LANGUAGES C CXX ASM)

View File

@ -107,12 +107,10 @@ fdb_error_t fdb_network_set_option( FDBNetworkOption option,
API->setNetworkOption( (FDBNetworkOptions::Option)option, value ? StringRef( value, value_length ) : Optional<StringRef>() ); ); API->setNetworkOption( (FDBNetworkOptions::Option)option, value ? StringRef( value, value_length ) : Optional<StringRef>() ); );
} }
extern "C"
fdb_error_t fdb_setup_network_impl() { fdb_error_t fdb_setup_network_impl() {
CATCH_AND_RETURN( API->setupNetwork(); ); CATCH_AND_RETURN( API->setupNetwork(); );
} }
extern "C"
fdb_error_t fdb_setup_network_v13( const char* localAddress ) { fdb_error_t fdb_setup_network_v13( const char* localAddress ) {
fdb_error_t errorCode = fdb_network_set_option( FDB_NET_OPTION_LOCAL_ADDRESS, (uint8_t const*)localAddress, strlen(localAddress) ); fdb_error_t errorCode = fdb_network_set_option( FDB_NET_OPTION_LOCAL_ADDRESS, (uint8_t const*)localAddress, strlen(localAddress) );
if(errorCode != 0) if(errorCode != 0)
@ -159,7 +157,6 @@ fdb_error_t fdb_future_block_until_ready( FDBFuture* f ) {
CATCH_AND_RETURN( TSAVB(f)->blockUntilReady(); ); CATCH_AND_RETURN( TSAVB(f)->blockUntilReady(); );
} }
extern "C" DLLEXPORT
fdb_bool_t fdb_future_is_error_v22( FDBFuture* f ) { fdb_bool_t fdb_future_is_error_v22( FDBFuture* f ) {
return TSAVB(f)->isError(); return TSAVB(f)->isError();
} }
@ -200,12 +197,10 @@ fdb_error_t fdb_future_set_callback( FDBFuture* f,
CATCH_AND_RETURN( TSAVB(f)->callOrSetAsCallback( cb, ignore, 0 ); ); CATCH_AND_RETURN( TSAVB(f)->callOrSetAsCallback( cb, ignore, 0 ); );
} }
extern "C" DLLEXPORT
fdb_error_t fdb_future_get_error_impl( FDBFuture* f ) { fdb_error_t fdb_future_get_error_impl( FDBFuture* f ) {
return TSAVB(f)->getErrorCode(); return TSAVB(f)->getErrorCode();
} }
extern "C" DLLEXPORT
fdb_error_t fdb_future_get_error_v22( FDBFuture* f, const char** description ) { fdb_error_t fdb_future_get_error_v22( FDBFuture* f, const char** description ) {
if ( !( TSAVB(f)->isError() ) ) if ( !( TSAVB(f)->isError() ) )
return error_code_future_not_error; return error_code_future_not_error;
@ -232,14 +227,12 @@ fdb_error_t fdb_future_get_key( FDBFuture* f, uint8_t const** out_key,
*out_key_length = key.size(); ); *out_key_length = key.size(); );
} }
extern "C" DLLEXPORT
fdb_error_t fdb_future_get_cluster_v609( FDBFuture* f, FDBCluster** out_cluster ) { fdb_error_t fdb_future_get_cluster_v609( FDBFuture* f, FDBCluster** out_cluster ) {
CATCH_AND_RETURN( CATCH_AND_RETURN(
*out_cluster = (FDBCluster*) *out_cluster = (FDBCluster*)
( (TSAV( char*, f )->get() ) ); ); ( (TSAV( char*, f )->get() ) ); );
} }
extern "C" DLLEXPORT
fdb_error_t fdb_future_get_database_v609( FDBFuture* f, FDBDatabase** out_database ) { fdb_error_t fdb_future_get_database_v609( FDBFuture* f, FDBDatabase** out_database ) {
CATCH_AND_RETURN( CATCH_AND_RETURN(
*out_database = (FDBDatabase*) *out_database = (FDBDatabase*)
@ -258,7 +251,6 @@ fdb_error_t fdb_future_get_value( FDBFuture* f, fdb_bool_t* out_present,
} ); } );
} }
extern "C"
fdb_error_t fdb_future_get_keyvalue_array_impl( fdb_error_t fdb_future_get_keyvalue_array_impl(
FDBFuture* f, FDBKeyValue const** out_kv, FDBFuture* f, FDBKeyValue const** out_kv,
int* out_count, fdb_bool_t* out_more ) int* out_count, fdb_bool_t* out_more )
@ -270,7 +262,6 @@ fdb_error_t fdb_future_get_keyvalue_array_impl(
*out_more = rrr.more; ); *out_more = rrr.more; );
} }
extern "C"
fdb_error_t fdb_future_get_keyvalue_array_v13( fdb_error_t fdb_future_get_keyvalue_array_v13(
FDBFuture* f, FDBKeyValue const** out_kv, int* out_count) FDBFuture* f, FDBKeyValue const** out_kv, int* out_count)
{ {
@ -280,7 +271,7 @@ fdb_error_t fdb_future_get_keyvalue_array_v13(
*out_count = rrr.size(); ); *out_count = rrr.size(); );
} }
extern "C" extern "C" DLLEXPORT
fdb_error_t fdb_future_get_string_array( fdb_error_t fdb_future_get_string_array(
FDBFuture* f, const char*** out_strings, int* out_count) FDBFuture* f, const char*** out_strings, int* out_count)
{ {
@ -291,7 +282,6 @@ fdb_error_t fdb_future_get_string_array(
); );
} }
extern "C" DLLEXPORT
FDBFuture* fdb_create_cluster_v609( const char* cluster_file_path ) { FDBFuture* fdb_create_cluster_v609( const char* cluster_file_path ) {
char *path; char *path;
if(cluster_file_path) { if(cluster_file_path) {
@ -305,7 +295,6 @@ FDBFuture* fdb_create_cluster_v609( const char* cluster_file_path ) {
return (FDBFuture*)ThreadFuture<char*>(path).extractPtr(); return (FDBFuture*)ThreadFuture<char*>(path).extractPtr();
} }
extern "C" DLLEXPORT
fdb_error_t fdb_cluster_set_option_v609( FDBCluster* c, fdb_error_t fdb_cluster_set_option_v609( FDBCluster* c,
FDBClusterOption option, FDBClusterOption option,
uint8_t const* value, uint8_t const* value,
@ -315,12 +304,19 @@ fdb_error_t fdb_cluster_set_option_v609( FDBCluster* c,
return error_code_success; return error_code_success;
} }
extern "C" DLLEXPORT
void fdb_cluster_destroy_v609( FDBCluster* c ) { void fdb_cluster_destroy_v609( FDBCluster* c ) {
CATCH_AND_DIE( delete[] CLUSTER(c); ); CATCH_AND_DIE( delete[] CLUSTER(c); );
} }
extern "C" DLLEXPORT // This exists so that fdb_cluster_create_database doesn't need to call the public symbol fdb_create_database.
// If it does and this is an external client loaded though the multi-version API, then it may inadvertently call
// the version of the function in the primary library if it was loaded into the global symbols.
fdb_error_t fdb_create_database_impl( const char* cluster_file_path, FDBDatabase** out_database ) {
CATCH_AND_RETURN(
*out_database = (FDBDatabase*)API->createDatabase( cluster_file_path ? cluster_file_path : "" ).extractPtr();
);
}
FDBFuture* fdb_cluster_create_database_v609( FDBCluster* c, uint8_t const* db_name, FDBFuture* fdb_cluster_create_database_v609( FDBCluster* c, uint8_t const* db_name,
int db_name_length ) int db_name_length )
{ {
@ -329,7 +325,7 @@ FDBFuture* fdb_cluster_create_database_v609( FDBCluster* c, uint8_t const* db_na
} }
FDBDatabase *db; FDBDatabase *db;
fdb_error_t err = fdb_create_database(CLUSTER(c), &db); fdb_error_t err = fdb_create_database_impl(CLUSTER(c), &db);
if(err) { if(err) {
return (FDBFuture*)ThreadFuture<Reference<IDatabase>>(Error(err)).extractPtr(); return (FDBFuture*)ThreadFuture<Reference<IDatabase>>(Error(err)).extractPtr();
} }
@ -339,9 +335,7 @@ FDBFuture* fdb_cluster_create_database_v609( FDBCluster* c, uint8_t const* db_na
extern "C" DLLEXPORT extern "C" DLLEXPORT
fdb_error_t fdb_create_database( const char* cluster_file_path, FDBDatabase** out_database ) { fdb_error_t fdb_create_database( const char* cluster_file_path, FDBDatabase** out_database ) {
CATCH_AND_RETURN( return fdb_create_database_impl( cluster_file_path, out_database );
*out_database = (FDBDatabase*)API->createDatabase( cluster_file_path ? cluster_file_path : "" ).extractPtr();
);
} }
extern "C" DLLEXPORT extern "C" DLLEXPORT
@ -393,21 +387,18 @@ FDBFuture* fdb_transaction_get_read_version( FDBTransaction* tr ) {
return (FDBFuture*)( TXN(tr)->getReadVersion().extractPtr() ); return (FDBFuture*)( TXN(tr)->getReadVersion().extractPtr() );
} }
extern "C"
FDBFuture* fdb_transaction_get_impl( FDBTransaction* tr, uint8_t const* key_name, FDBFuture* fdb_transaction_get_impl( FDBTransaction* tr, uint8_t const* key_name,
int key_name_length, fdb_bool_t snapshot ) { int key_name_length, fdb_bool_t snapshot ) {
return (FDBFuture*) return (FDBFuture*)
( TXN(tr)->get( KeyRef( key_name, key_name_length ), snapshot ).extractPtr() ); ( TXN(tr)->get( KeyRef( key_name, key_name_length ), snapshot ).extractPtr() );
} }
extern "C"
FDBFuture* fdb_transaction_get_v13( FDBTransaction* tr, uint8_t const* key_name, FDBFuture* fdb_transaction_get_v13( FDBTransaction* tr, uint8_t const* key_name,
int key_name_length ) int key_name_length )
{ {
return fdb_transaction_get_impl( tr, key_name, key_name_length, 0 ); return fdb_transaction_get_impl( tr, key_name, key_name_length, 0 );
} }
extern "C"
FDBFuture* fdb_transaction_get_key_impl( FDBTransaction* tr, uint8_t const* key_name, FDBFuture* fdb_transaction_get_key_impl( FDBTransaction* tr, uint8_t const* key_name,
int key_name_length, fdb_bool_t or_equal, int key_name_length, fdb_bool_t or_equal,
int offset, fdb_bool_t snapshot ) { int offset, fdb_bool_t snapshot ) {
@ -418,7 +409,6 @@ FDBFuture* fdb_transaction_get_key_impl( FDBTransaction* tr, uint8_t const* key_
snapshot ).extractPtr() ); snapshot ).extractPtr() );
} }
extern "C"
FDBFuture* fdb_transaction_get_key_v13( FDBTransaction* tr, uint8_t const* key_name, FDBFuture* fdb_transaction_get_key_v13( FDBTransaction* tr, uint8_t const* key_name,
int key_name_length, fdb_bool_t or_equal, int key_name_length, fdb_bool_t or_equal,
int offset ) { int offset ) {
@ -426,14 +416,13 @@ FDBFuture* fdb_transaction_get_key_v13( FDBTransaction* tr, uint8_t const* key_n
or_equal, offset, false ); or_equal, offset, false );
} }
extern "C" extern "C" DLLEXPORT
FDBFuture* fdb_transaction_get_addresses_for_key( FDBTransaction* tr, uint8_t const* key_name, FDBFuture* fdb_transaction_get_addresses_for_key( FDBTransaction* tr, uint8_t const* key_name,
int key_name_length ){ int key_name_length ){
return (FDBFuture*)( TXN(tr)->getAddressesForKey( KeyRef(key_name, key_name_length) ).extractPtr() ); return (FDBFuture*)( TXN(tr)->getAddressesForKey( KeyRef(key_name, key_name_length) ).extractPtr() );
} }
extern "C"
FDBFuture* fdb_transaction_get_range_impl( FDBFuture* fdb_transaction_get_range_impl(
FDBTransaction* tr, uint8_t const* begin_key_name, FDBTransaction* tr, uint8_t const* begin_key_name,
int begin_key_name_length, fdb_bool_t begin_or_equal, int begin_offset, int begin_key_name_length, fdb_bool_t begin_or_equal, int begin_offset,
@ -504,7 +493,6 @@ FDBFuture* fdb_transaction_get_range_impl(
snapshot, reverse ).extractPtr() ); snapshot, reverse ).extractPtr() );
} }
extern "C"
FDBFuture* fdb_transaction_get_range_selector_v13( FDBFuture* fdb_transaction_get_range_selector_v13(
FDBTransaction* tr, uint8_t const* begin_key_name, int begin_key_name_length, FDBTransaction* tr, uint8_t const* begin_key_name, int begin_key_name_length,
fdb_bool_t begin_or_equal, int begin_offset, uint8_t const* end_key_name, fdb_bool_t begin_or_equal, int begin_offset, uint8_t const* end_key_name,
@ -516,7 +504,6 @@ FDBFuture* fdb_transaction_get_range_selector_v13(
limit, 0, FDB_STREAMING_MODE_EXACT, 0, false, false); limit, 0, FDB_STREAMING_MODE_EXACT, 0, false, false);
} }
extern "C"
FDBFuture* fdb_transaction_get_range_v13( FDBFuture* fdb_transaction_get_range_v13(
FDBTransaction* tr, uint8_t const* begin_key_name, int begin_key_name_length, FDBTransaction* tr, uint8_t const* begin_key_name, int begin_key_name_length,
uint8_t const* end_key_name, int end_key_name_length, int limit ) uint8_t const* end_key_name, int end_key_name_length, int limit )
@ -599,7 +586,6 @@ FDBFuture* fdb_transaction_get_versionstamp( FDBTransaction* tr )
return (FDBFuture*)(TXN(tr)->getVersionstamp().extractPtr()); return (FDBFuture*)(TXN(tr)->getVersionstamp().extractPtr());
} }
extern "C"
fdb_error_t fdb_transaction_set_option_impl( FDBTransaction* tr, fdb_error_t fdb_transaction_set_option_impl( FDBTransaction* tr,
FDBTransactionOption option, FDBTransactionOption option,
uint8_t const* value, uint8_t const* value,
@ -609,7 +595,6 @@ fdb_error_t fdb_transaction_set_option_impl( FDBTransaction* tr,
TXN(tr)->setOption( (FDBTransactionOptions::Option)option, value ? StringRef( value, value_length ) : Optional<StringRef>() ); ); TXN(tr)->setOption( (FDBTransactionOptions::Option)option, value ? StringRef( value, value_length ) : Optional<StringRef>() ); );
} }
extern "C"
void fdb_transaction_set_option_v13( FDBTransaction* tr, void fdb_transaction_set_option_v13( FDBTransaction* tr,
FDBTransactionOption option ) FDBTransactionOption option )
{ {
@ -679,6 +664,10 @@ fdb_error_t fdb_select_api_version_impl( int runtime_version, int header_version
// Versioned API changes -- descending order by version (new changes at top) // Versioned API changes -- descending order by version (new changes at top)
// FDB_API_CHANGED( function, ver ) means there is a new implementation as of ver, and a function function_(ver-1) is the old implementation // FDB_API_CHANGED( function, ver ) means there is a new implementation as of ver, and a function function_(ver-1) is the old implementation
// FDB_API_REMOVED( function, ver ) means the function was removed as of ver, and function_(ver-1) is the old implementation // FDB_API_REMOVED( function, ver ) means the function was removed as of ver, and function_(ver-1) is the old implementation
//
// WARNING: use caution when implementing removed functions by calling public API functions. This can lead to undesired behavior when
// using the multi-version API. Instead, it is better to have both the removed and public functions call an internal implementation function.
// See fdb_create_database_impl for an example.
FDB_API_REMOVED( fdb_future_get_version, 620 ); FDB_API_REMOVED( fdb_future_get_version, 620 );
FDB_API_REMOVED( fdb_create_cluster, 610 ); FDB_API_REMOVED( fdb_create_cluster, 610 );
FDB_API_REMOVED( fdb_cluster_create_database, 610 ); FDB_API_REMOVED( fdb_cluster_create_database, 610 );

View File

@ -294,6 +294,13 @@ extern "C" {
DLLEXPORT WARN_UNUSED_RESULT FDBFuture* DLLEXPORT WARN_UNUSED_RESULT FDBFuture*
fdb_cluster_create_database( FDBCluster* c, uint8_t const* db_name, fdb_cluster_create_database( FDBCluster* c, uint8_t const* db_name,
int db_name_length ); int db_name_length );
#else
#define fdb_future_get_cluster(f, oc) FDB_REMOVED_FUNCTION
#define fdb_future_get_database(f, od) FDB_REMOVED_FUNCTION
#define fdb_create_cluster(cfp) FDB_REMOVED_FUNCTION
#define fdb_cluster_destroy(c) FDB_REMOVED_FUNCTION
#define fdb_cluster_set_option(c, o, v, vl) FDB_REMOVED_FUNCTION
#define fdb_cluster_create_database(c, dn, dnl) FDB_REMOVED_FUNCTION
#endif #endif
#if FDB_API_VERSION < 23 #if FDB_API_VERSION < 23

View File

@ -1,13 +1,13 @@
FROM centos:6 FROM centos:6
LABEL version=0.1.7 LABEL version=0.1.8
ENV DOCKER_IMAGEVER=0.1.7 ENV DOCKER_IMAGEVER=0.1.8
# Install dependencies for developer tools, bindings,\ # Install dependencies for developer tools, bindings,\
# documentation, actorcompiler, and packaging tools\ # documentation, actorcompiler, and packaging tools\
RUN yum install -y yum-utils &&\ RUN yum install -y yum-utils &&\
yum-config-manager --enable rhel-server-rhscl-7-rpms &&\ yum-config-manager --enable rhel-server-rhscl-7-rpms &&\
yum -y install centos-release-scl epel-release &&\ yum -y install centos-release-scl epel-release &&\
yum -y install devtoolset-8 java-1.8.0-openjdk-devel \ yum -y install devtoolset-8-8.1-1.el6 java-1.8.0-openjdk-devel \
rh-python36-python-devel devtoolset-8-valgrind-devel \ rh-python36-python-devel devtoolset-8-valgrind-devel \
mono-core rh-ruby24 golang python27 rpm-build debbuild \ mono-core rh-ruby24 golang python27 rpm-build debbuild \
python-pip npm dos2unix valgrind-devel ccache distcc &&\ python-pip npm dos2unix valgrind-devel ccache distcc &&\

View File

@ -2,7 +2,7 @@ version: "3"
services: services:
common: &common common: &common
image: foundationdb/foundationdb-build:0.1.7 image: foundationdb/foundationdb-build:0.1.8
build-setup: &build-setup build-setup: &build-setup
<<: *common <<: *common

View File

@ -221,9 +221,14 @@ else()
# Check whether we can use dtrace probes # Check whether we can use dtrace probes
include(CheckSymbolExists) include(CheckSymbolExists)
check_symbol_exists(DTRACE_PROBE sys/sdt.h SUPPORT_DTRACE) check_symbol_exists(DTRACE_PROBE sys/sdt.h SUPPORT_DTRACE)
check_symbol_exists(aligned_alloc stdlib.h HAS_ALIGNED_ALLOC)
message(STATUS "Has aligned_alloc: ${HAS_ALIGNED_ALLOC}")
if(SUPPORT_DTRACE) if(SUPPORT_DTRACE)
add_compile_definitions(DTRACE_PROBES) add_compile_definitions(DTRACE_PROBES)
endif() endif()
if(HAS_ALIGNED_ALLOC)
add_compile_definitions(HAS_ALIGNED_ALLOC)
endif()
if(CMAKE_COMPILER_IS_GNUCXX) if(CMAKE_COMPILER_IS_GNUCXX)
set(USE_LTO OFF CACHE BOOL "Do link time optimization") set(USE_LTO OFF CACHE BOOL "Do link time optimization")

View File

@ -380,6 +380,24 @@ The ``list`` subcommand will list the backups at a given 'base' or shortened Bac
This a shortened Backup URL which looks just like a Backup URL but without the backup <name> so that the list command will discover and list all of the backups in the bucket. This a shortened Backup URL which looks just like a Backup URL but without the backup <name> so that the list command will discover and list all of the backups in the bucket.
.. program:: fdbbackup cleanup
``cleanup``
------------
The ``cleanup`` subcommand will list orphaned backups and DRs and optionally remove their mutations.
::
user@host$ fdbbackup cleanup [--delete_data] [--min_cleanup_seconds] [-C <CLUSTER_FILE>]
``--delete_data``
This flag will cause ``cleanup`` to remove mutations for the most stale backup or DR.
``--min_cleanup_seconds``
Specifies the amount of time a backup or DR needs to be stale before ``cleanup`` will remove mutations for it. By default this is set to one hour.
``fdbrestore`` command line tool ``fdbrestore`` command line tool
================================ ================================

View File

@ -599,7 +599,8 @@ Regions are configured in FoundationDB as a json document. For example::
"datacenters":[{ "datacenters":[{
"id":"WC1", "id":"WC1",
"priority":1, "priority":1,
"satellite":1 "satellite":1,
"satellite_logs":2
}], }],
"satellite_redundancy_mode":"one_satellite_double", "satellite_redundancy_mode":"one_satellite_double",
"satellite_logs":2 "satellite_logs":2
@ -659,7 +660,8 @@ This is the region configuration that implements the example::
},{ },{
"id":"WC2", "id":"WC2",
"priority":0, "priority":0,
"satellite":1 "satellite":1,
"satellite_logs":2
}], }],
"satellite_redundancy_mode":"one_satellite_double" "satellite_redundancy_mode":"one_satellite_double"
},{ },{

View File

@ -10,38 +10,38 @@ macOS
The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server. The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server.
* `FoundationDB-6.2.4.pkg <https://www.foundationdb.org/downloads/6.2.4/macOS/installers/FoundationDB-6.2.4.pkg>`_ * `FoundationDB-6.2.6.pkg <https://www.foundationdb.org/downloads/6.2.6/macOS/installers/FoundationDB-6.2.6.pkg>`_
Ubuntu Ubuntu
------ ------
The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x. The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x.
* `foundationdb-clients-6.2.4-1_amd64.deb <https://www.foundationdb.org/downloads/6.2.4/ubuntu/installers/foundationdb-clients_6.2.4-1_amd64.deb>`_ * `foundationdb-clients-6.2.6-1_amd64.deb <https://www.foundationdb.org/downloads/6.2.6/ubuntu/installers/foundationdb-clients_6.2.6-1_amd64.deb>`_
* `foundationdb-server-6.2.4-1_amd64.deb <https://www.foundationdb.org/downloads/6.2.4/ubuntu/installers/foundationdb-server_6.2.4-1_amd64.deb>`_ (depends on the clients package) * `foundationdb-server-6.2.6-1_amd64.deb <https://www.foundationdb.org/downloads/6.2.6/ubuntu/installers/foundationdb-server_6.2.6-1_amd64.deb>`_ (depends on the clients package)
RHEL/CentOS EL6 RHEL/CentOS EL6
--------------- ---------------
The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x. The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x.
* `foundationdb-clients-6.2.4-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.4/rhel6/installers/foundationdb-clients-6.2.4-1.el6.x86_64.rpm>`_ * `foundationdb-clients-6.2.6-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.6/rhel6/installers/foundationdb-clients-6.2.6-1.el6.x86_64.rpm>`_
* `foundationdb-server-6.2.4-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.4/rhel6/installers/foundationdb-server-6.2.4-1.el6.x86_64.rpm>`_ (depends on the clients package) * `foundationdb-server-6.2.6-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.6/rhel6/installers/foundationdb-server-6.2.6-1.el6.x86_64.rpm>`_ (depends on the clients package)
RHEL/CentOS EL7 RHEL/CentOS EL7
--------------- ---------------
The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x. The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x.
* `foundationdb-clients-6.2.4-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.4/rhel7/installers/foundationdb-clients-6.2.4-1.el7.x86_64.rpm>`_ * `foundationdb-clients-6.2.6-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.6/rhel7/installers/foundationdb-clients-6.2.6-1.el7.x86_64.rpm>`_
* `foundationdb-server-6.2.4-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.4/rhel7/installers/foundationdb-server-6.2.4-1.el7.x86_64.rpm>`_ (depends on the clients package) * `foundationdb-server-6.2.6-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.6/rhel7/installers/foundationdb-server-6.2.6-1.el7.x86_64.rpm>`_ (depends on the clients package)
Windows Windows
------- -------
The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server. The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server.
* `foundationdb-6.2.4-x64.msi <https://www.foundationdb.org/downloads/6.2.4/windows/installers/foundationdb-6.2.4-x64.msi>`_ * `foundationdb-6.2.6-x64.msi <https://www.foundationdb.org/downloads/6.2.6/windows/installers/foundationdb-6.2.6-x64.msi>`_
API Language Bindings API Language Bindings
===================== =====================
@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part
If you need to use the FoundationDB Python API from other Python installations or paths, download the Python package: If you need to use the FoundationDB Python API from other Python installations or paths, download the Python package:
* `foundationdb-6.2.4.tar.gz <https://www.foundationdb.org/downloads/6.2.4/bindings/python/foundationdb-6.2.4.tar.gz>`_ * `foundationdb-6.2.6.tar.gz <https://www.foundationdb.org/downloads/6.2.6/bindings/python/foundationdb-6.2.6.tar.gz>`_
Ruby 1.9.3/2.0.0+ Ruby 1.9.3/2.0.0+
----------------- -----------------
* `fdb-6.2.4.gem <https://www.foundationdb.org/downloads/6.2.4/bindings/ruby/fdb-6.2.4.gem>`_ * `fdb-6.2.6.gem <https://www.foundationdb.org/downloads/6.2.6/bindings/ruby/fdb-6.2.6.gem>`_
Java 8+ Java 8+
------- -------
* `fdb-java-6.2.4.jar <https://www.foundationdb.org/downloads/6.2.4/bindings/java/fdb-java-6.2.4.jar>`_ * `fdb-java-6.2.6.jar <https://www.foundationdb.org/downloads/6.2.6/bindings/java/fdb-java-6.2.6.jar>`_
* `fdb-java-6.2.4-javadoc.jar <https://www.foundationdb.org/downloads/6.2.4/bindings/java/fdb-java-6.2.4-javadoc.jar>`_ * `fdb-java-6.2.6-javadoc.jar <https://www.foundationdb.org/downloads/6.2.6/bindings/java/fdb-java-6.2.6-javadoc.jar>`_
Go 1.11+ Go 1.11+
-------- --------

View File

@ -498,7 +498,8 @@
"datacenters":[{ "datacenters":[{
"id":"mr", "id":"mr",
"priority":1, "priority":1,
"satellite":1 "satellite":1,
"satellite_logs":2
}], }],
"satellite_redundancy_mode":{ "satellite_redundancy_mode":{
"$enum":[ "$enum":[
@ -577,6 +578,7 @@
"max_machine_failures_without_losing_availability":0, "max_machine_failures_without_losing_availability":0,
"total_disk_used_bytes":0, "total_disk_used_bytes":0,
"total_kv_size_bytes":0, // estimated "total_kv_size_bytes":0, // estimated
"system_kv_size_bytes":0, // estimated
"partitions_count":2, "partitions_count":2,
"moving_data":{ "moving_data":{
"total_written_bytes":0, "total_written_bytes":0,

View File

@ -2,6 +2,13 @@
Release Notes Release Notes
############# #############
6.1.13
======
* Loading a 6.1 or newer ``fdb_c`` library as a secondary client using the multi-version client could lead to an infinite recursion when run with API versions older than 610. `(PR #2169) <https://github.com/apple/foundationdb/pull/2169>`_
* Using C API functions that were removed in 6.1 when using API version 610 or above now results in a compilation error. `(PR #2169) <https://github.com/apple/foundationdb/pull/2169>`_
* ``fdbrestore`` commands other than ``start`` required a default cluster file to be found but did not actually use it. `(PR #1912) <https://github.com/apple/foundationdb/pull/1912>`_.
6.1.12 6.1.12
====== ======

View File

@ -2,7 +2,7 @@
Release Notes Release Notes
############# #############
6.2.5 6.2.6
===== =====
Performance Performance
@ -27,6 +27,7 @@ Performance
* Log routers will prefer to peek from satellites at ``log_version >= 4``. `(PR #1795) <https://github.com/apple/foundationdb/pull/1795>`_. * Log routers will prefer to peek from satellites at ``log_version >= 4``. `(PR #1795) <https://github.com/apple/foundationdb/pull/1795>`_.
* In clusters using a region configuration, clients will read from the remote region if all of the servers in the primary region are overloaded. [6.2.3] `(PR #2019) <https://github.com/apple/foundationdb/pull/2019>`_. * In clusters using a region configuration, clients will read from the remote region if all of the servers in the primary region are overloaded. [6.2.3] `(PR #2019) <https://github.com/apple/foundationdb/pull/2019>`_.
* Significantly improved the rate at which the transaction logs in a remote region can pull data from the primary region. [6.2.4] `(PR #2101) <https://github.com/apple/foundationdb/pull/2101>`_. * Significantly improved the rate at which the transaction logs in a remote region can pull data from the primary region. [6.2.4] `(PR #2101) <https://github.com/apple/foundationdb/pull/2101>`_.
* Raised the data distribution priority of splitting shards because delaying splits can cause hot write shards. [6.2.6] `(PR #2234) <https://github.com/apple/foundationdb/pull/2234>`_.
Fixes Fixes
----- -----
@ -47,6 +48,16 @@ Fixes
* Configuring regions would fail with an internal error if the cluster contained storage servers that didn't set a datacenter ID. `(PR #2017) <https://github.com/apple/foundationdb/pull/2017>`_. * Configuring regions would fail with an internal error if the cluster contained storage servers that didn't set a datacenter ID. `(PR #2017) <https://github.com/apple/foundationdb/pull/2017>`_.
* Clients no longer prefer reading from servers with the same zone ID, because it could create hot shards. [6.2.3] `(PR #2019) <https://github.com/apple/foundationdb/pull/2019>`_. * Clients no longer prefer reading from servers with the same zone ID, because it could create hot shards. [6.2.3] `(PR #2019) <https://github.com/apple/foundationdb/pull/2019>`_.
* Data distribution could fail to start if any storage servers had misconfigured locality information. This problem could persist even after the offending storage servers were removed or fixed. [6.2.5] `(PR #2110) <https://github.com/apple/foundationdb/pull/2110>`_. * Data distribution could fail to start if any storage servers had misconfigured locality information. This problem could persist even after the offending storage servers were removed or fixed. [6.2.5] `(PR #2110) <https://github.com/apple/foundationdb/pull/2110>`_.
* Data distribution was running at too high of a priority, which sometimes caused other roles on the same process to stall. [6.2.5] `(PR #2170) <https://github.com/apple/foundationdb/pull/2170>`_.
* Loading a 6.1 or newer ``fdb_c`` library as a secondary client using the multi-version client could lead to an infinite recursion when run with API versions older than 610. [6.2.5] `(PR #2169) <https://github.com/apple/foundationdb/pull/2169>`_
* Using C API functions that were removed in 6.1 when using API version 610 or above now results in a compilation error. [6.2.5] `(PR #2169) <https://github.com/apple/foundationdb/pull/2169>`_
* Coordinator changes could fail to complete if the database wasn't allowing any transactions to start. [6.2.6] `(PR #2191) <https://github.com/apple/foundationdb/pull/2191>`_
* Status would report incorrect fault tolerance metrics when a remote region was configured and the primary region lost a storage replica. [6.2.6] `(PR #2230) <https://github.com/apple/foundationdb/pull/2230>`_
* The cluster would not change to a new set of satellite transaction logs when they become available in a better satellite location. [6.2.6] `(PR #2241) <https://github.com/apple/foundationdb/pull/2241>`_.
* The existence of ``proxy`` or ``resolver`` class processes prevented ``stateless`` class processes from being recruited as proxies or resolvers. [6.2.6] `(PR #2241) <https://github.com/apple/foundationdb/pull/2241>`_.
* Committing transactions larger than 1 MB could cause the proxy to stall for up to a second. [6.2.6] `(PR #2250) <https://github.com/apple/foundationdb/pull/2250>`_.
* The cluster controller could become saturated in clusters with large numbers of connected clients using TLS. [6.2.6] `(PR #2252) <https://github.com/apple/foundationdb/pull/2252>`_.
* Backup and DR would not share a mutation stream if they were started on different versions of FoundationDB. Either backup or DR must be restarted to resolve this issue. [6.2.6] `(PR #2202) <https://github.com/apple/foundationdb/pull/2202>`_.
Status Status
------ ------
@ -64,6 +75,7 @@ Status
* Add ``coordinator`` to the list of roles that can be reported for a process. [6.2.3] `(PR #2006) <https://github.com/apple/foundationdb/pull/2006>`_. * Add ``coordinator`` to the list of roles that can be reported for a process. [6.2.3] `(PR #2006) <https://github.com/apple/foundationdb/pull/2006>`_.
* Added ``worst_durability_lag_storage_server`` and ``limiting_durability_lag_storage_server`` to the ``cluster.qos`` section, each with subfields ``versions`` and ``seconds``. These report the durability lag values being used by ratekeeper to potentially limit the transaction rate. [6.2.3] `(PR #2003) <https://github.com/apple/foundationdb/pull/2003>`_. * Added ``worst_durability_lag_storage_server`` and ``limiting_durability_lag_storage_server`` to the ``cluster.qos`` section, each with subfields ``versions`` and ``seconds``. These report the durability lag values being used by ratekeeper to potentially limit the transaction rate. [6.2.3] `(PR #2003) <https://github.com/apple/foundationdb/pull/2003>`_.
* Added ``worst_data_lag_storage_server`` and ``limiting_data_lag_storage_server`` to the ``cluster.qos`` section, each with subfields ``versions`` and ``seconds``. These are meant to replace ``worst_version_lag_storage_server`` and ``limiting_version_lag_storage_server``, which are now deprecated. [6.2.3] `(PR #2003) <https://github.com/apple/foundationdb/pull/2003>`_. * Added ``worst_data_lag_storage_server`` and ``limiting_data_lag_storage_server`` to the ``cluster.qos`` section, each with subfields ``versions`` and ``seconds``. These are meant to replace ``worst_version_lag_storage_server`` and ``limiting_version_lag_storage_server``, which are now deprecated. [6.2.3] `(PR #2003) <https://github.com/apple/foundationdb/pull/2003>`_.
* Added ``system_kv_size_bytes`` to the ``cluster.data`` section to record the size of the system keyspace. [6.2.5] `(PR #2170) <https://github.com/apple/foundationdb/pull/2170>`_.
Bindings Bindings
-------- --------
@ -78,6 +90,12 @@ Bindings
* Added a transaction option to control the whether ``get_addresses_for_key`` includes a port in the address. This will be deprecated in api version 700, and addresses will include ports by default. [6.2.4] `(PR #2060) <https://github.com/apple/foundationdb/pull/2060>`_. * Added a transaction option to control the whether ``get_addresses_for_key`` includes a port in the address. This will be deprecated in api version 700, and addresses will include ports by default. [6.2.4] `(PR #2060) <https://github.com/apple/foundationdb/pull/2060>`_.
* Python: ``Versionstamp`` comparisons didn't work in Python 3. [6.2.4] `(PR #2089) <https://github.com/apple/foundationdb/pull/2089>`_. * Python: ``Versionstamp`` comparisons didn't work in Python 3. [6.2.4] `(PR #2089) <https://github.com/apple/foundationdb/pull/2089>`_.
Features
--------
* Added the ``cleanup`` command to ``fdbbackup`` which can be used to remove orphaned backups or DRs. [6.2.5] `(PR #2170) <https://github.com/apple/foundationdb/pull/2170>`_.
* Added the ability to configure ``satellite_logs`` by satellite location. This will overwrite the region configure of ``satellite_logs`` if both are present. [6.2.6] `(PR #2241) <https://github.com/apple/foundationdb/pull/2241>`_.
Other Changes Other Changes
------------- -------------
@ -112,6 +130,10 @@ Fixes only impacting 6.2.0+
* The cluster controller would saturate its CPU for a few seconds when sending configuration information to all of the worker processes. [6.2.4] `(PR #2086) <https://github.com/apple/foundationdb/pull/2086>`_. * The cluster controller would saturate its CPU for a few seconds when sending configuration information to all of the worker processes. [6.2.4] `(PR #2086) <https://github.com/apple/foundationdb/pull/2086>`_.
* The data distributor would build all possible team combinations if it was tracking an unhealthy server with less than 10 teams. [6.2.4] `(PR #2099) <https://github.com/apple/foundationdb/pull/2099>`_. * The data distributor would build all possible team combinations if it was tracking an unhealthy server with less than 10 teams. [6.2.4] `(PR #2099) <https://github.com/apple/foundationdb/pull/2099>`_.
* The cluster controller could crash if a coordinator was unreachable when compiling cluster status. [6.2.4] `(PR #2065) <https://github.com/apple/foundationdb/pull/2065>`_. * The cluster controller could crash if a coordinator was unreachable when compiling cluster status. [6.2.4] `(PR #2065) <https://github.com/apple/foundationdb/pull/2065>`_.
* The cluster controller could crash if a coordinator was unreachable when compiling cluster status. [6.2.4] `(PR #2065) <https://github.com/apple/foundationdb/pull/2065>`_.
* A storage server could crash if it took longer than 10 minutes to fetch a key range from another server. [6.2.5] `(PR #2170) <https://github.com/apple/foundationdb/pull/2170>`_.
* Excluding or including servers would restart the data distributor. [6.2.5] `(PR #2170) <https://github.com/apple/foundationdb/pull/2170>`_.
* The data distributor could read invalid memory when estimating database size. [6.2.6] `(PR #2225) <https://github.com/apple/foundationdb/pull/2225>`_.
Earlier release notes Earlier release notes
--------------------- ---------------------

View File

@ -77,7 +77,7 @@ enum enumProgramExe {
}; };
enum enumBackupType { enum enumBackupType {
BACKUP_UNDEFINED=0, BACKUP_START, BACKUP_MODIFY, BACKUP_STATUS, BACKUP_ABORT, BACKUP_WAIT, BACKUP_DISCONTINUE, BACKUP_PAUSE, BACKUP_RESUME, BACKUP_EXPIRE, BACKUP_DELETE, BACKUP_DESCRIBE, BACKUP_LIST, BACKUP_DUMP BACKUP_UNDEFINED=0, BACKUP_START, BACKUP_MODIFY, BACKUP_STATUS, BACKUP_ABORT, BACKUP_WAIT, BACKUP_DISCONTINUE, BACKUP_PAUSE, BACKUP_RESUME, BACKUP_EXPIRE, BACKUP_DELETE, BACKUP_DESCRIBE, BACKUP_LIST, BACKUP_DUMP, BACKUP_CLEANUP
}; };
enum enumDBType { enum enumDBType {
@ -95,7 +95,7 @@ enum {
OPT_EXPIRE_BEFORE_VERSION, OPT_EXPIRE_BEFORE_DATETIME, OPT_EXPIRE_DELETE_BEFORE_DAYS, OPT_EXPIRE_BEFORE_VERSION, OPT_EXPIRE_BEFORE_DATETIME, OPT_EXPIRE_DELETE_BEFORE_DAYS,
OPT_EXPIRE_RESTORABLE_AFTER_VERSION, OPT_EXPIRE_RESTORABLE_AFTER_DATETIME, OPT_EXPIRE_MIN_RESTORABLE_DAYS, OPT_EXPIRE_RESTORABLE_AFTER_VERSION, OPT_EXPIRE_RESTORABLE_AFTER_DATETIME, OPT_EXPIRE_MIN_RESTORABLE_DAYS,
OPT_BASEURL, OPT_BLOB_CREDENTIALS, OPT_DESCRIBE_DEEP, OPT_DESCRIBE_TIMESTAMPS, OPT_BASEURL, OPT_BLOB_CREDENTIALS, OPT_DESCRIBE_DEEP, OPT_DESCRIBE_TIMESTAMPS,
OPT_DUMP_BEGIN, OPT_DUMP_END, OPT_JSON, OPT_DUMP_BEGIN, OPT_DUMP_END, OPT_JSON, OPT_DELETE_DATA, OPT_MIN_CLEANUP_SECONDS,
// Backup and Restore constants // Backup and Restore constants
OPT_TAGNAME, OPT_BACKUPKEYS, OPT_WAITFORDONE, OPT_TAGNAME, OPT_BACKUPKEYS, OPT_WAITFORDONE,
@ -253,6 +253,7 @@ CSimpleOpt::SOption g_rgBackupStatusOptions[] = {
{ OPT_HELP, "--help", SO_NONE }, { OPT_HELP, "--help", SO_NONE },
{ OPT_DEVHELP, "--dev-help", SO_NONE }, { OPT_DEVHELP, "--dev-help", SO_NONE },
{ OPT_JSON, "--json", SO_NONE}, { OPT_JSON, "--json", SO_NONE},
{ OPT_KNOB, "--knob_", SO_REQ_SEP },
#ifndef TLS_DISABLED #ifndef TLS_DISABLED
TLS_OPTION_FLAGS TLS_OPTION_FLAGS
#endif #endif
@ -282,6 +283,37 @@ CSimpleOpt::SOption g_rgBackupAbortOptions[] = {
{ OPT_HELP, "-h", SO_NONE }, { OPT_HELP, "-h", SO_NONE },
{ OPT_HELP, "--help", SO_NONE }, { OPT_HELP, "--help", SO_NONE },
{ OPT_DEVHELP, "--dev-help", SO_NONE }, { OPT_DEVHELP, "--dev-help", SO_NONE },
{ OPT_KNOB, "--knob_", SO_REQ_SEP },
#ifndef TLS_DISABLED
TLS_OPTION_FLAGS
#endif
SO_END_OF_OPTIONS
};
CSimpleOpt::SOption g_rgBackupCleanupOptions[] = {
#ifdef _WIN32
{ OPT_PARENTPID, "--parentpid", SO_REQ_SEP },
#endif
{ OPT_CLUSTERFILE, "-C", SO_REQ_SEP },
{ OPT_CLUSTERFILE, "--cluster_file", SO_REQ_SEP },
{ OPT_TRACE, "--log", SO_NONE },
{ OPT_TRACE_DIR, "--logdir", SO_REQ_SEP },
{ OPT_TRACE_FORMAT, "--trace_format", SO_REQ_SEP },
{ OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP },
{ OPT_QUIET, "-q", SO_NONE },
{ OPT_QUIET, "--quiet", SO_NONE },
{ OPT_VERSION, "--version", SO_NONE },
{ OPT_VERSION, "-v", SO_NONE },
{ OPT_CRASHONERROR, "--crash", SO_NONE },
{ OPT_MEMLIMIT, "-m", SO_REQ_SEP },
{ OPT_MEMLIMIT, "--memory", SO_REQ_SEP },
{ OPT_HELP, "-?", SO_NONE },
{ OPT_HELP, "-h", SO_NONE },
{ OPT_HELP, "--help", SO_NONE },
{ OPT_DEVHELP, "--dev-help", SO_NONE },
{ OPT_KNOB, "--knob_", SO_REQ_SEP },
{ OPT_DELETE_DATA, "--delete_data", SO_NONE },
{ OPT_MIN_CLEANUP_SECONDS, "--min_cleanup_seconds", SO_REQ_SEP },
#ifndef TLS_DISABLED #ifndef TLS_DISABLED
TLS_OPTION_FLAGS TLS_OPTION_FLAGS
#endif #endif
@ -313,6 +345,7 @@ CSimpleOpt::SOption g_rgBackupDiscontinueOptions[] = {
{ OPT_HELP, "-h", SO_NONE }, { OPT_HELP, "-h", SO_NONE },
{ OPT_HELP, "--help", SO_NONE }, { OPT_HELP, "--help", SO_NONE },
{ OPT_DEVHELP, "--dev-help", SO_NONE }, { OPT_DEVHELP, "--dev-help", SO_NONE },
{ OPT_KNOB, "--knob_", SO_REQ_SEP },
#ifndef TLS_DISABLED #ifndef TLS_DISABLED
TLS_OPTION_FLAGS TLS_OPTION_FLAGS
#endif #endif
@ -344,6 +377,7 @@ CSimpleOpt::SOption g_rgBackupWaitOptions[] = {
{ OPT_HELP, "-h", SO_NONE }, { OPT_HELP, "-h", SO_NONE },
{ OPT_HELP, "--help", SO_NONE }, { OPT_HELP, "--help", SO_NONE },
{ OPT_DEVHELP, "--dev-help", SO_NONE }, { OPT_DEVHELP, "--dev-help", SO_NONE },
{ OPT_KNOB, "--knob_", SO_REQ_SEP },
#ifndef TLS_DISABLED #ifndef TLS_DISABLED
TLS_OPTION_FLAGS TLS_OPTION_FLAGS
#endif #endif
@ -371,6 +405,7 @@ CSimpleOpt::SOption g_rgBackupPauseOptions[] = {
{ OPT_HELP, "-h", SO_NONE }, { OPT_HELP, "-h", SO_NONE },
{ OPT_HELP, "--help", SO_NONE }, { OPT_HELP, "--help", SO_NONE },
{ OPT_DEVHELP, "--dev-help", SO_NONE }, { OPT_DEVHELP, "--dev-help", SO_NONE },
{ OPT_KNOB, "--knob_", SO_REQ_SEP },
#ifndef TLS_DISABLED #ifndef TLS_DISABLED
TLS_OPTION_FLAGS TLS_OPTION_FLAGS
#endif #endif
@ -640,6 +675,7 @@ CSimpleOpt::SOption g_rgDBStartOptions[] = {
{ OPT_HELP, "-h", SO_NONE }, { OPT_HELP, "-h", SO_NONE },
{ OPT_HELP, "--help", SO_NONE }, { OPT_HELP, "--help", SO_NONE },
{ OPT_DEVHELP, "--dev-help", SO_NONE }, { OPT_DEVHELP, "--dev-help", SO_NONE },
{ OPT_KNOB, "--knob_", SO_REQ_SEP },
#ifndef TLS_DISABLED #ifndef TLS_DISABLED
TLS_OPTION_FLAGS TLS_OPTION_FLAGS
#endif #endif
@ -673,6 +709,7 @@ CSimpleOpt::SOption g_rgDBStatusOptions[] = {
{ OPT_HELP, "-h", SO_NONE }, { OPT_HELP, "-h", SO_NONE },
{ OPT_HELP, "--help", SO_NONE }, { OPT_HELP, "--help", SO_NONE },
{ OPT_DEVHELP, "--dev-help", SO_NONE }, { OPT_DEVHELP, "--dev-help", SO_NONE },
{ OPT_KNOB, "--knob_", SO_REQ_SEP },
#ifndef TLS_DISABLED #ifndef TLS_DISABLED
TLS_OPTION_FLAGS TLS_OPTION_FLAGS
#endif #endif
@ -705,6 +742,7 @@ CSimpleOpt::SOption g_rgDBSwitchOptions[] = {
{ OPT_HELP, "-h", SO_NONE }, { OPT_HELP, "-h", SO_NONE },
{ OPT_HELP, "--help", SO_NONE }, { OPT_HELP, "--help", SO_NONE },
{ OPT_DEVHELP, "--dev-help", SO_NONE }, { OPT_DEVHELP, "--dev-help", SO_NONE },
{ OPT_KNOB, "--knob_", SO_REQ_SEP },
#ifndef TLS_DISABLED #ifndef TLS_DISABLED
TLS_OPTION_FLAGS TLS_OPTION_FLAGS
#endif #endif
@ -737,6 +775,7 @@ CSimpleOpt::SOption g_rgDBAbortOptions[] = {
{ OPT_HELP, "-h", SO_NONE }, { OPT_HELP, "-h", SO_NONE },
{ OPT_HELP, "--help", SO_NONE }, { OPT_HELP, "--help", SO_NONE },
{ OPT_DEVHELP, "--dev-help", SO_NONE }, { OPT_DEVHELP, "--dev-help", SO_NONE },
{ OPT_KNOB, "--knob_", SO_REQ_SEP },
#ifndef TLS_DISABLED #ifndef TLS_DISABLED
TLS_OPTION_FLAGS TLS_OPTION_FLAGS
#endif #endif
@ -766,6 +805,7 @@ CSimpleOpt::SOption g_rgDBPauseOptions[] = {
{ OPT_HELP, "-h", SO_NONE }, { OPT_HELP, "-h", SO_NONE },
{ OPT_HELP, "--help", SO_NONE }, { OPT_HELP, "--help", SO_NONE },
{ OPT_DEVHELP, "--dev-help", SO_NONE }, { OPT_DEVHELP, "--dev-help", SO_NONE },
{ OPT_KNOB, "--knob_", SO_REQ_SEP },
#ifndef TLS_DISABLED #ifndef TLS_DISABLED
TLS_OPTION_FLAGS TLS_OPTION_FLAGS
#endif #endif
@ -1186,6 +1226,7 @@ enumBackupType getBackupType(std::string backupType)
values["start"] = BACKUP_START; values["start"] = BACKUP_START;
values["status"] = BACKUP_STATUS; values["status"] = BACKUP_STATUS;
values["abort"] = BACKUP_ABORT; values["abort"] = BACKUP_ABORT;
values["cleanup"] = BACKUP_CLEANUP;
values["wait"] = BACKUP_WAIT; values["wait"] = BACKUP_WAIT;
values["discontinue"] = BACKUP_DISCONTINUE; values["discontinue"] = BACKUP_DISCONTINUE;
values["pause"] = BACKUP_PAUSE; values["pause"] = BACKUP_PAUSE;
@ -1863,6 +1904,21 @@ ACTOR Future<Void> abortBackup(Database db, std::string tagName) {
return Void(); return Void();
} }
ACTOR Future<Void> cleanupMutations(Database db, bool deleteData) {
try
{
wait(cleanupBackup(db, deleteData));
}
catch (Error& e) {
if(e.code() == error_code_actor_cancelled)
throw;
fprintf(stderr, "ERROR: %s\n", e.what());
throw;
}
return Void();
}
ACTOR Future<Void> waitBackup(Database db, std::string tagName, bool stopWhenDone) { ACTOR Future<Void> waitBackup(Database db, std::string tagName, bool stopWhenDone) {
try try
{ {
@ -2540,6 +2596,9 @@ int main(int argc, char* argv[]) {
case BACKUP_ABORT: case BACKUP_ABORT:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupAbortOptions, SO_O_EXACT); args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupAbortOptions, SO_O_EXACT);
break; break;
case BACKUP_CLEANUP:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupCleanupOptions, SO_O_EXACT);
break;
case BACKUP_WAIT: case BACKUP_WAIT:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupWaitOptions, SO_O_EXACT); args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupWaitOptions, SO_O_EXACT);
break; break;
@ -2712,6 +2771,7 @@ int main(int argc, char* argv[]) {
std::string restoreClusterFileDest; std::string restoreClusterFileDest;
std::string restoreClusterFileOrig; std::string restoreClusterFileOrig;
bool jsonOutput = false; bool jsonOutput = false;
bool deleteData = false;
BackupModifyOptions modifyOptions; BackupModifyOptions modifyOptions;
@ -2791,6 +2851,12 @@ int main(int argc, char* argv[]) {
case OPT_DRYRUN: case OPT_DRYRUN:
dryRun = true; dryRun = true;
break; break;
case OPT_DELETE_DATA:
deleteData = true;
break;
case OPT_MIN_CLEANUP_SECONDS:
knobs.push_back( std::make_pair( "min_cleanup_seconds", args->OptionArg() ) );
break;
case OPT_FORCE: case OPT_FORCE:
forceAction = true; forceAction = true;
break; break;
@ -3354,6 +3420,12 @@ int main(int argc, char* argv[]) {
f = stopAfter( abortBackup(db, tagName) ); f = stopAfter( abortBackup(db, tagName) );
break; break;
case BACKUP_CLEANUP:
if(!initCluster())
return FDB_EXIT_ERROR;
f = stopAfter( cleanupMutations(db, deleteData) );
break;
case BACKUP_WAIT: case BACKUP_WAIT:
if(!initCluster()) if(!initCluster())
return FDB_EXIT_ERROR; return FDB_EXIT_ERROR;

View File

@ -485,7 +485,7 @@ bool copyParameter(Reference<Task> source, Reference<Task> dest, Key key);
Version getVersionFromString(std::string const& value); Version getVersionFromString(std::string const& value);
Standalone<VectorRef<KeyRangeRef>> getLogRanges(Version beginVersion, Version endVersion, Key destUidValue, int blockSize = CLIENT_KNOBS->LOG_RANGE_BLOCK_SIZE); Standalone<VectorRef<KeyRangeRef>> getLogRanges(Version beginVersion, Version endVersion, Key destUidValue, int blockSize = CLIENT_KNOBS->LOG_RANGE_BLOCK_SIZE);
Standalone<VectorRef<KeyRangeRef>> getApplyRanges(Version beginVersion, Version endVersion, Key backupUid); Standalone<VectorRef<KeyRangeRef>> getApplyRanges(Version beginVersion, Version endVersion, Key backupUid);
Future<Void> eraseLogData(Database cx, Key logUidValue, Key destUidValue, Optional<Version> endVersion = Optional<Version>(), bool checkBackupUid = false, Version backupUid = 0); Future<Void> eraseLogData(Reference<ReadYourWritesTransaction> tr, Key logUidValue, Key destUidValue, Optional<Version> endVersion = Optional<Version>(), bool checkBackupUid = false, Version backupUid = 0);
Key getApplyKey( Version version, Key backupUid ); Key getApplyKey( Version version, Key backupUid );
std::pair<uint64_t, uint32_t> decodeBKMutationLogKey(Key key); std::pair<uint64_t, uint32_t> decodeBKMutationLogKey(Key key);
Standalone<VectorRef<MutationRef>> decodeBackupLogValue(StringRef value); Standalone<VectorRef<MutationRef>> decodeBackupLogValue(StringRef value);
@ -503,6 +503,7 @@ ACTOR Future<Void> readCommitted(Database cx, PromiseStream<RCGroup> results, Fu
ACTOR Future<Void> applyMutations(Database cx, Key uid, Key addPrefix, Key removePrefix, Version beginVersion, ACTOR Future<Void> applyMutations(Database cx, Key uid, Key addPrefix, Key removePrefix, Version beginVersion,
Version* endVersion, RequestStream<CommitTransactionRequest> commit, Version* endVersion, RequestStream<CommitTransactionRequest> commit,
NotifiedVersion* committedVersion, Reference<KeyRangeMap<Version>> keyVersion); NotifiedVersion* committedVersion, Reference<KeyRangeMap<Version>> keyVersion);
ACTOR Future<Void> cleanupBackup(Database cx, bool deleteData);
typedef BackupAgentBase::enumState EBackupState; typedef BackupAgentBase::enumState EBackupState;
template<> inline Tuple Codec<EBackupState>::pack(EBackupState const &val) { return Tuple().append(val); } template<> inline Tuple Codec<EBackupState>::pack(EBackupState const &val) { return Tuple().append(val); }

View File

@ -708,7 +708,7 @@ ACTOR Future<Void> applyMutations(Database cx, Key uid, Key addPrefix, Key remov
} }
} }
ACTOR static Future<Void> _eraseLogData(Database cx, Key logUidValue, Key destUidValue, Optional<Version> endVersion, bool checkBackupUid, Version backupUid) { ACTOR static Future<Void> _eraseLogData(Reference<ReadYourWritesTransaction> tr, Key logUidValue, Key destUidValue, Optional<Version> endVersion, bool checkBackupUid, Version backupUid) {
state Key backupLatestVersionsPath = destUidValue.withPrefix(backupLatestVersionsPrefix); state Key backupLatestVersionsPath = destUidValue.withPrefix(backupLatestVersionsPrefix);
state Key backupLatestVersionsKey = logUidValue.withPrefix(backupLatestVersionsPath); state Key backupLatestVersionsKey = logUidValue.withPrefix(backupLatestVersionsPath);
@ -716,104 +716,199 @@ ACTOR static Future<Void> _eraseLogData(Database cx, Key logUidValue, Key destUi
return Void(); return Void();
} }
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
if (checkBackupUid) {
Subspace sourceStates = Subspace(databaseBackupPrefixRange.begin).get(BackupAgentBase::keySourceStates).get(logUidValue);
Optional<Value> v = wait( tr->get( sourceStates.pack(DatabaseBackupAgent::keyFolderId) ) );
if(v.present() && BinaryReader::fromStringRef<Version>(v.get(), Unversioned()) > backupUid)
return Void();
}
state Standalone<RangeResultRef> backupVersions = wait(tr->getRange(KeyRangeRef(backupLatestVersionsPath, strinc(backupLatestVersionsPath)), CLIENT_KNOBS->TOO_MANY));
// Make sure version history key does exist and lower the beginVersion if needed
state Version currBeginVersion = invalidVersion;
for (auto backupVersion : backupVersions) {
Key currLogUidValue = backupVersion.key.removePrefix(backupLatestVersionsPrefix).removePrefix(destUidValue);
if (currLogUidValue == logUidValue) {
currBeginVersion = BinaryReader::fromStringRef<Version>(backupVersion.value, Unversioned());
break;
}
}
// Do not clear anything if version history key cannot be found
if (currBeginVersion == invalidVersion) {
return Void();
}
state Version currEndVersion = std::numeric_limits<Version>::max();
if(endVersion.present()) {
currEndVersion = std::min(currEndVersion, endVersion.get());
}
state Version nextSmallestVersion = currEndVersion;
bool clearLogRangesRequired = true;
// More than one backup/DR with the same range
if (backupVersions.size() > 1) {
for (auto backupVersion : backupVersions) {
Key currLogUidValue = backupVersion.key.removePrefix(backupLatestVersionsPrefix).removePrefix(destUidValue);
Version currVersion = BinaryReader::fromStringRef<Version>(backupVersion.value, Unversioned());
if (currLogUidValue == logUidValue) {
continue;
} else if (currVersion > currBeginVersion) {
nextSmallestVersion = std::min(currVersion, nextSmallestVersion);
} else {
// If we can find a version less than or equal to beginVersion, clearing log ranges is not required
clearLogRangesRequired = false;
break;
}
}
}
if (endVersion.present() || backupVersions.size() != 1 || BUGGIFY) {
if (!endVersion.present()) {
// Clear current backup version history
tr->clear(backupLatestVersionsKey);
if(backupVersions.size() == 1) {
tr->clear(prefixRange(destUidValue.withPrefix(logRangesRange.begin)));
}
} else {
// Update current backup latest version
tr->set(backupLatestVersionsKey, BinaryWriter::toValue<Version>(currEndVersion, Unversioned()));
}
// Clear log ranges if needed
if (clearLogRangesRequired) {
if((nextSmallestVersion - currBeginVersion) / CLIENT_KNOBS->LOG_RANGE_BLOCK_SIZE >= std::numeric_limits<uint8_t>::max() || BUGGIFY) {
Key baLogRangePrefix = destUidValue.withPrefix(backupLogKeys.begin);
for(int h = 0; h <= std::numeric_limits<uint8_t>::max(); h++) {
uint64_t bv = bigEndian64(Version(0));
uint64_t ev = bigEndian64(nextSmallestVersion);
uint8_t h1 = h;
Key vblockPrefix = StringRef(&h1, sizeof(uint8_t)).withPrefix(baLogRangePrefix);
tr->clear(KeyRangeRef(StringRef((uint8_t*)&bv, sizeof(uint64_t)).withPrefix(vblockPrefix),
StringRef((uint8_t*)&ev, sizeof(uint64_t)).withPrefix(vblockPrefix)));
}
} else {
Standalone<VectorRef<KeyRangeRef>> ranges = getLogRanges(currBeginVersion, nextSmallestVersion, destUidValue);
for (auto& range : ranges) {
tr->clear(range);
}
}
}
} else {
// Clear version history
tr->clear(prefixRange(backupLatestVersionsPath));
// Clear everything under blog/[destUid]
tr->clear(prefixRange(destUidValue.withPrefix(backupLogKeys.begin)));
// Disable committing mutations into blog
tr->clear(prefixRange(destUidValue.withPrefix(logRangesRange.begin)));
}
if(!endVersion.present() && backupVersions.size() == 1) {
Standalone<RangeResultRef> existingDestUidValues = wait(tr->getRange(KeyRangeRef(destUidLookupPrefix, strinc(destUidLookupPrefix)), CLIENT_KNOBS->TOO_MANY));
for(auto it : existingDestUidValues) {
if( it.value == destUidValue ) {
tr->clear(it.key);
}
}
}
return Void();
}
Future<Void> eraseLogData(Reference<ReadYourWritesTransaction> tr, Key logUidValue, Key destUidValue, Optional<Version> endVersion, bool checkBackupUid, Version backupUid) {
return _eraseLogData(tr, logUidValue, destUidValue, endVersion, checkBackupUid, backupUid);
}
ACTOR Future<Void> cleanupLogMutations(Database cx, Value destUidValue, bool deleteData) {
state Key backupLatestVersionsPath = destUidValue.withPrefix(backupLatestVersionsPrefix);
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx)); state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
loop{ state Optional<Key> removingLogUid;
state std::set<Key> loggedLogUids;
loop {
try { try {
tr->setOption(FDBTransactionOptions::LOCK_AWARE); tr->setOption(FDBTransactionOptions::LOCK_AWARE);
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
if (checkBackupUid) {
Subspace sourceStates = Subspace(databaseBackupPrefixRange.begin).get(BackupAgentBase::keySourceStates).get(logUidValue);
Optional<Value> v = wait( tr->get( sourceStates.pack(DatabaseBackupAgent::keyFolderId) ) );
if(v.present() && BinaryReader::fromStringRef<Version>(v.get(), Unversioned()) > backupUid)
return Void();
}
state Standalone<RangeResultRef> backupVersions = wait(tr->getRange(KeyRangeRef(backupLatestVersionsPath, strinc(backupLatestVersionsPath)), CLIENT_KNOBS->TOO_MANY)); state Standalone<RangeResultRef> backupVersions = wait(tr->getRange(KeyRangeRef(backupLatestVersionsPath, strinc(backupLatestVersionsPath)), CLIENT_KNOBS->TOO_MANY));
state Version readVer = tr->getReadVersion().get();
// Make sure version history key does exist and lower the beginVersion if needed state Version minVersion = std::numeric_limits<Version>::max();
state Version currBeginVersion = invalidVersion; state Key minVersionLogUid;
for (auto backupVersion : backupVersions) {
Key currLogUidValue = backupVersion.key.removePrefix(backupLatestVersionsPrefix).removePrefix(destUidValue);
if (currLogUidValue == logUidValue) { state int backupIdx = 0;
currBeginVersion = BinaryReader::fromStringRef<Version>(backupVersion.value, Unversioned()); for (; backupIdx < backupVersions.size(); backupIdx++) {
break; state Version currVersion = BinaryReader::fromStringRef<Version>(backupVersions[backupIdx].value, Unversioned());
state Key currLogUid = backupVersions[backupIdx].key.removePrefix(backupLatestVersionsPrefix).removePrefix(destUidValue);
if( currVersion < minVersion ) {
minVersionLogUid = currLogUid;
minVersion = currVersion;
} }
}
// Do not clear anything if version history key cannot be found if(!loggedLogUids.count(currLogUid)) {
if (currBeginVersion == invalidVersion) { state Future<Optional<Value>> foundDRKey = tr->get(Subspace(databaseBackupPrefixRange.begin).get(BackupAgentBase::keySourceStates).get(currLogUid).pack(DatabaseBackupAgent::keyStateStatus));
return Void(); state Future<Optional<Value>> foundBackupKey = tr->get(Subspace(currLogUid.withPrefix(LiteralStringRef("uid->config/")).withPrefix(fileBackupPrefixRange.begin)).pack(LiteralStringRef("stateEnum")));
} wait(success(foundDRKey) && success(foundBackupKey));
state Version currEndVersion = currBeginVersion + CLIENT_KNOBS->CLEAR_LOG_RANGE_COUNT * CLIENT_KNOBS->LOG_RANGE_BLOCK_SIZE; if(foundDRKey.get().present() && foundBackupKey.get().present()) {
if(endVersion.present()) { printf("WARNING: Found a tag which looks like both a backup and a DR. This tag was %.4f hours behind.\n", (readVer - currVersion)/(3600.0*CLIENT_KNOBS->CORE_VERSIONSPERSECOND));
currEndVersion = std::min(currEndVersion, endVersion.get()); } else if(foundDRKey.get().present() && !foundBackupKey.get().present()) {
} printf("Found a DR which was %.4f hours behind.\n", (readVer - currVersion)/(3600.0*CLIENT_KNOBS->CORE_VERSIONSPERSECOND));
} else if(!foundDRKey.get().present() && foundBackupKey.get().present()) {
state Version nextSmallestVersion = currEndVersion; printf("Found a Backup which was %.4f hours behind.\n", (readVer - currVersion)/(3600.0*CLIENT_KNOBS->CORE_VERSIONSPERSECOND));
bool clearLogRangesRequired = true;
// More than one backup/DR with the same range
if (backupVersions.size() > 1) {
for (auto backupVersion : backupVersions) {
Key currLogUidValue = backupVersion.key.removePrefix(backupLatestVersionsPrefix).removePrefix(destUidValue);
Version currVersion = BinaryReader::fromStringRef<Version>(backupVersion.value, Unversioned());
if (currLogUidValue == logUidValue) {
continue;
} else if (currVersion > currBeginVersion) {
nextSmallestVersion = std::min(currVersion, nextSmallestVersion);
} else { } else {
// If we can find a version less than or equal to beginVersion, clearing log ranges is not required printf("WARNING: Found a unknown tag which was %.4f hours behind.\n", (readVer - currVersion)/(3600.0*CLIENT_KNOBS->CORE_VERSIONSPERSECOND));
clearLogRangesRequired = false;
break;
} }
loggedLogUids.insert(currLogUid);
} }
} }
if (!endVersion.present() && backupVersions.size() == 1) { if( readVer - minVersion > CLIENT_KNOBS->MIN_CLEANUP_SECONDS*CLIENT_KNOBS->CORE_VERSIONSPERSECOND && deleteData && (!removingLogUid.present() || minVersionLogUid == removingLogUid.get()) ) {
// Clear version history removingLogUid = minVersionLogUid;
tr->clear(prefixRange(backupLatestVersionsPath)); wait(eraseLogData(tr, minVersionLogUid, destUidValue));
wait(tr->commit());
// Clear everything under blog/[destUid] printf("\nSuccessfully removed the tag which was %.4f hours behind.\n", (readVer - minVersion)/(3600.0*CLIENT_KNOBS->CORE_VERSIONSPERSECOND));
tr->clear(prefixRange(destUidValue.withPrefix(backupLogKeys.begin))); } else if(removingLogUid.present() && minVersionLogUid != removingLogUid.get()) {
printf("\nWARNING: The oldest tag was possibly removed, run again without `--delete_data' to check.\n");
// Disable committing mutations into blog } else if( deleteData ) {
tr->clear(prefixRange(destUidValue.withPrefix(logRangesRange.begin))); printf("\nWARNING: Did not delete data because the tag was not at least %.4f hours behind. Change `--min_cleanup_seconds' to adjust this threshold.\n", CLIENT_KNOBS->MIN_CLEANUP_SECONDS/3600.0);
} else { } else {
if (!endVersion.present() && currEndVersion >= nextSmallestVersion) { printf("\nPassing `--delete_data' would delete the tag which was %.4f hours behind.\n", (readVer - minVersion)/(3600.0*CLIENT_KNOBS->CORE_VERSIONSPERSECOND));
// Clear current backup version history }
tr->clear(backupLatestVersionsKey);
} else {
// Update current backup latest version
tr->set(backupLatestVersionsKey, BinaryWriter::toValue<Version>(currEndVersion, Unversioned()));
}
// Clear log ranges if needed return Void();
if (clearLogRangesRequired) { } catch( Error& e) {
Standalone<VectorRef<KeyRangeRef>> ranges = getLogRanges(currBeginVersion, nextSmallestVersion, destUidValue);
for (auto& range : ranges) {
tr->clear(range);
}
}
}
wait(tr->commit());
if (!endVersion.present() && (backupVersions.size() == 1 || currEndVersion >= nextSmallestVersion)) {
return Void();
}
if(endVersion.present() && currEndVersion == endVersion.get()) {
return Void();
}
tr->reset();
} catch (Error &e) {
wait(tr->onError(e)); wait(tr->onError(e));
} }
} }
} }
Future<Void> eraseLogData(Database cx, Key logUidValue, Key destUidValue, Optional<Version> endVersion, bool checkBackupUid, Version backupUid) { ACTOR Future<Void> cleanupBackup(Database cx, bool deleteData) {
return _eraseLogData(cx, logUidValue, destUidValue, endVersion, checkBackupUid, backupUid); state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
loop {
try {
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
state Standalone<RangeResultRef> destUids = wait(tr->getRange(KeyRangeRef(destUidLookupPrefix, strinc(destUidLookupPrefix)), CLIENT_KNOBS->TOO_MANY));
for(auto destUid : destUids) {
wait(cleanupLogMutations(cx, destUid.value, deleteData));
}
return Void();
} catch( Error& e) {
wait(tr->onError(e));
}
}
} }

View File

@ -482,11 +482,17 @@ namespace dbBackup {
wait(checkTaskVersion(cx, task, EraseLogRangeTaskFunc::name, EraseLogRangeTaskFunc::version)); wait(checkTaskVersion(cx, task, EraseLogRangeTaskFunc::name, EraseLogRangeTaskFunc::version));
Version endVersion = BinaryReader::fromStringRef<Version>(task->params[DatabaseBackupAgent::keyEndVersion], Unversioned()); state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(taskBucket->src));
loop {
wait(eraseLogData(taskBucket->src, task->params[BackupAgentBase::keyConfigLogUid], task->params[BackupAgentBase::destUid], Optional<Version>(endVersion), true, BinaryReader::fromStringRef<Version>(task->params[BackupAgentBase::keyFolderId], Unversioned()))); try {
Version endVersion = BinaryReader::fromStringRef<Version>(task->params[DatabaseBackupAgent::keyEndVersion], Unversioned());
return Void(); wait(eraseLogData(tr, task->params[BackupAgentBase::keyConfigLogUid], task->params[BackupAgentBase::destUid], Optional<Version>(endVersion), true, BinaryReader::fromStringRef<Version>(task->params[BackupAgentBase::keyFolderId], Unversioned())));
wait(tr->commit());
return Void();
} catch( Error &e ) {
wait(tr->onError(e));
}
}
} }
ACTOR static Future<Key> addTask(Reference<ReadYourWritesTransaction> tr, Reference<TaskBucket> taskBucket, Reference<Task> parentTask, Version endVersion, TaskCompletionKey completionKey, Reference<TaskFuture> waitFor = Reference<TaskFuture>()) { ACTOR static Future<Key> addTask(Reference<ReadYourWritesTransaction> tr, Reference<TaskBucket> taskBucket, Reference<Task> parentTask, Version endVersion, TaskCompletionKey completionKey, Reference<TaskFuture> waitFor = Reference<TaskFuture>()) {
@ -833,8 +839,7 @@ namespace dbBackup {
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(taskBucket->src)); state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(taskBucket->src));
state Key logUidValue = task->params[DatabaseBackupAgent::keyConfigLogUid]; state Key logUidValue = task->params[DatabaseBackupAgent::keyConfigLogUid];
state Key destUidValue = task->params[BackupAgentBase::destUid]; state Key destUidValue = task->params[BackupAgentBase::destUid];
state Version beginVersion; state Version backupUid = BinaryReader::fromStringRef<Version>(task->params[BackupAgentBase::keyFolderId], Unversioned());
state Version endVersion;
loop { loop {
try { try {
@ -844,25 +849,13 @@ namespace dbBackup {
if(v.present() && BinaryReader::fromStringRef<Version>(v.get(), Unversioned()) > BinaryReader::fromStringRef<Version>(task->params[DatabaseBackupAgent::keyFolderId], Unversioned())) if(v.present() && BinaryReader::fromStringRef<Version>(v.get(), Unversioned()) > BinaryReader::fromStringRef<Version>(task->params[DatabaseBackupAgent::keyFolderId], Unversioned()))
return Void(); return Void();
state Key latestVersionKey = logUidValue.withPrefix(task->params[BackupAgentBase::destUid].withPrefix(backupLatestVersionsPrefix)); wait(eraseLogData(tr, logUidValue, destUidValue, Optional<Version>(), true, backupUid));
state Optional<Key> bVersion = wait(tr->get(latestVersionKey)); wait(tr->commit());
return Void();
if (!bVersion.present()) {
return Void();
}
beginVersion = BinaryReader::fromStringRef<Version>(bVersion.get(), Unversioned());
endVersion = tr->getReadVersion().get();
break;
} catch(Error &e) { } catch(Error &e) {
wait(tr->onError(e)); wait(tr->onError(e));
} }
} }
Version backupUid = BinaryReader::fromStringRef<Version>(task->params[BackupAgentBase::keyFolderId], Unversioned());
wait(eraseLogData(taskBucket->src, logUidValue, destUidValue, Optional<Version>(), true, backupUid));
return Void();
} }
ACTOR static Future<Key> addTask(Reference<ReadYourWritesTransaction> tr, Reference<TaskBucket> taskBucket, Reference<Task> parentTask, TaskCompletionKey completionKey, Reference<TaskFuture> waitFor = Reference<TaskFuture>()) { ACTOR static Future<Key> addTask(Reference<ReadYourWritesTransaction> tr, Reference<TaskBucket> taskBucket, Reference<Task> parentTask, TaskCompletionKey completionKey, Reference<TaskFuture> waitFor = Reference<TaskFuture>()) {
@ -1303,19 +1296,25 @@ namespace dbBackup {
} }
if (backupRanges.size() == 1) { if (backupRanges.size() == 1) {
state Key destUidLookupPath = BinaryWriter::toValue(backupRanges[0], IncludeVersion()).withPrefix(destUidLookupPrefix); Standalone<RangeResultRef> existingDestUidValues = wait(srcTr->getRange(KeyRangeRef(destUidLookupPrefix, strinc(destUidLookupPrefix)), CLIENT_KNOBS->TOO_MANY));
Optional<Key> existingDestUidValue = wait(srcTr->get(destUidLookupPath)); bool found = false;
if (existingDestUidValue.present()) { for(auto it : existingDestUidValues) {
if (destUidValue == existingDestUidValue.get()) { if( BinaryReader::fromStringRef<KeyRange>(it.key.removePrefix(destUidLookupPrefix), IncludeVersion()) == backupRanges[0] ) {
// due to unknown commit result if(destUidValue != it.value) {
break; // existing backup/DR is running
} else { return Void();
// existing backup/DR is running } else {
return Void(); // due to unknown commit result
found = true;
break;
}
} }
} }
if(found) {
break;
}
srcTr->set(destUidLookupPath, destUidValue); srcTr->set(BinaryWriter::toValue(backupRanges[0], IncludeVersion(ProtocolVersion::withSharedMutations())).withPrefix(destUidLookupPrefix), destUidValue);
} }
Key versionKey = logUidValue.withPrefix(destUidValue).withPrefix(backupLatestVersionsPrefix); Key versionKey = logUidValue.withPrefix(destUidValue).withPrefix(backupLatestVersionsPrefix);
@ -1473,13 +1472,18 @@ namespace dbBackup {
// Initialize destUid // Initialize destUid
if (backupRanges.size() == 1) { if (backupRanges.size() == 1) {
state Key destUidLookupPath = BinaryWriter::toValue(backupRanges[0], IncludeVersion()).withPrefix(destUidLookupPrefix); Standalone<RangeResultRef> existingDestUidValues = wait(srcTr->getRange(KeyRangeRef(destUidLookupPrefix, strinc(destUidLookupPrefix)), CLIENT_KNOBS->TOO_MANY));
Optional<Key> existingDestUidValue = wait(srcTr->get(destUidLookupPath)); bool found = false;
if (existingDestUidValue.present()) { for(auto it : existingDestUidValues) {
destUidValue = existingDestUidValue.get(); if( BinaryReader::fromStringRef<KeyRange>(it.key.removePrefix(destUidLookupPrefix), IncludeVersion()) == backupRanges[0] ) {
} else { destUidValue = it.value;
found = true;
break;
}
}
if( !found ) {
destUidValue = BinaryWriter::toValue(deterministicRandom()->randomUniqueID(), Unversioned()); destUidValue = BinaryWriter::toValue(deterministicRandom()->randomUniqueID(), Unversioned());
srcTr->set(destUidLookupPath, destUidValue); srcTr->set(BinaryWriter::toValue(backupRanges[0], IncludeVersion(ProtocolVersion::withSharedMutations())).withPrefix(destUidLookupPrefix), destUidValue);
} }
} }
@ -2179,22 +2183,23 @@ public:
} }
} }
if(partial) state Future<Void> partialTimeout = partial ? delay(30.0) : Never();
return Void();
state Reference<ReadYourWritesTransaction> srcTr(new ReadYourWritesTransaction(backupAgent->taskBucket->src)); state Reference<ReadYourWritesTransaction> srcTr(new ReadYourWritesTransaction(backupAgent->taskBucket->src));
state Version beginVersion; state Version beginVersion;
state Version endVersion; state Version endVersion;
state bool clearSrcDb = true;
loop { loop {
try { try {
srcTr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); srcTr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
srcTr->setOption(FDBTransactionOptions::LOCK_AWARE); srcTr->setOption(FDBTransactionOptions::LOCK_AWARE);
Optional<Value> v = wait( srcTr->get( backupAgent->sourceStates.get(logUidValue).pack(DatabaseBackupAgent::keyFolderId) ) ); state Future<Optional<Value>> backupVersionF = srcTr->get( backupAgent->sourceStates.get(logUidValue).pack(DatabaseBackupAgent::keyFolderId) );
wait(success(backupVersionF) || partialTimeout);
if(partialTimeout.isReady()) {
return Void();
}
if(v.present() && BinaryReader::fromStringRef<Version>(v.get(), Unversioned()) > BinaryReader::fromStringRef<Version>(backupUid, Unversioned())) { if(backupVersionF.get().present() && BinaryReader::fromStringRef<Version>(backupVersionF.get().get(), Unversioned()) > BinaryReader::fromStringRef<Version>(backupUid, Unversioned())) {
clearSrcDb = false;
break; break;
} }
@ -2208,18 +2213,31 @@ public:
Key latestVersionKey = logUidValue.withPrefix(destUidValue.withPrefix(backupLatestVersionsPrefix)); Key latestVersionKey = logUidValue.withPrefix(destUidValue.withPrefix(backupLatestVersionsPrefix));
Optional<Key> bVersion = wait(srcTr->get(latestVersionKey)); state Future<Optional<Key>> bVersionF = srcTr->get(latestVersionKey);
if (bVersion.present()) { wait(success(bVersionF) || partialTimeout);
beginVersion = BinaryReader::fromStringRef<Version>(bVersion.get(), Unversioned()); if(partialTimeout.isReady()) {
return Void();
}
if (bVersionF.get().present()) {
beginVersion = BinaryReader::fromStringRef<Version>(bVersionF.get().get(), Unversioned());
} else { } else {
clearSrcDb = false;
break; break;
} }
srcTr->set( backupAgent->sourceStates.pack(DatabaseBackupAgent::keyStateStatus), StringRef(DatabaseBackupAgent::getStateText(BackupAgentBase::STATE_PARTIALLY_ABORTED) )); srcTr->set( backupAgent->sourceStates.pack(DatabaseBackupAgent::keyStateStatus), StringRef(DatabaseBackupAgent::getStateText(BackupAgentBase::STATE_PARTIALLY_ABORTED) ));
srcTr->set( backupAgent->sourceStates.get(logUidValue).pack(DatabaseBackupAgent::keyFolderId), backupUid ); srcTr->set( backupAgent->sourceStates.get(logUidValue).pack(DatabaseBackupAgent::keyFolderId), backupUid );
wait(srcTr->commit()); wait( eraseLogData(srcTr, logUidValue, destUidValue) || partialTimeout );
if(partialTimeout.isReady()) {
return Void();
}
wait(srcTr->commit() || partialTimeout);
if(partialTimeout.isReady()) {
return Void();
}
endVersion = srcTr->getCommittedVersion() + 1; endVersion = srcTr->getCommittedVersion() + 1;
break; break;
@ -2229,10 +2247,6 @@ public:
} }
} }
if (clearSrcDb && !abortOldBackup) {
wait(eraseLogData(backupAgent->taskBucket->src, logUidValue, destUidValue));
}
tr = Reference<ReadYourWritesTransaction>(new ReadYourWritesTransaction(cx)); tr = Reference<ReadYourWritesTransaction>(new ReadYourWritesTransaction(cx));
loop { loop {
try { try {

View File

@ -73,6 +73,7 @@ void parse( std::vector<RegionInfo>* regions, ValueRef const& v ) {
s.get("id", idStr); s.get("id", idStr);
satInfo.dcId = idStr; satInfo.dcId = idStr;
s.get("priority", satInfo.priority); s.get("priority", satInfo.priority);
s.tryGet("satellite_logs", satInfo.satelliteDesiredTLogCount);
info.satellites.push_back(satInfo); info.satellites.push_back(satInfo);
} else { } else {
if (foundNonSatelliteDatacenter) throw invalid_option(); if (foundNonSatelliteDatacenter) throw invalid_option();
@ -365,6 +366,9 @@ StatusArray DatabaseConfiguration::getRegionJSON() const {
satObj["id"] = s.dcId.toString(); satObj["id"] = s.dcId.toString();
satObj["priority"] = s.priority; satObj["priority"] = s.priority;
satObj["satellite"] = 1; satObj["satellite"] = 1;
if(s.satelliteDesiredTLogCount != -1) {
satObj["satellite_logs"] = s.satelliteDesiredTLogCount;
}
dcArr.push_back(satObj); dcArr.push_back(satObj);
} }

View File

@ -32,6 +32,7 @@
struct SatelliteInfo { struct SatelliteInfo {
Key dcId; Key dcId;
int32_t priority; int32_t priority;
int32_t satelliteDesiredTLogCount = -1;
SatelliteInfo() : priority(0) {} SatelliteInfo() : priority(0) {}
@ -41,7 +42,7 @@ struct SatelliteInfo {
template <class Ar> template <class Ar>
void serialize(Ar& ar) { void serialize(Ar& ar) {
serializer(ar, dcId, priority); serializer(ar, dcId, priority, satelliteDesiredTLogCount);
} }
}; };

View File

@ -1988,6 +1988,7 @@ namespace fileBackup {
const uint32_t BackupLogRangeTaskFunc::version = 1; const uint32_t BackupLogRangeTaskFunc::version = 1;
REGISTER_TASKFUNC(BackupLogRangeTaskFunc); REGISTER_TASKFUNC(BackupLogRangeTaskFunc);
//This task stopped being used in 6.2, however the code remains here to handle upgrades.
struct EraseLogRangeTaskFunc : BackupTaskFuncBase { struct EraseLogRangeTaskFunc : BackupTaskFuncBase {
static StringRef name; static StringRef name;
static const uint32_t version; static const uint32_t version;
@ -2005,21 +2006,6 @@ namespace fileBackup {
} }
} Params; } Params;
ACTOR static Future<Void> _execute(Database cx, Reference<TaskBucket> taskBucket, Reference<FutureBucket> futureBucket, Reference<Task> task) {
state Reference<FlowLock> lock(new FlowLock(CLIENT_KNOBS->BACKUP_LOCK_BYTES));
wait(checkTaskVersion(cx, task, EraseLogRangeTaskFunc::name, EraseLogRangeTaskFunc::version));
state Version endVersion = Params.endVersion().get(task);
state Key destUidValue = Params.destUidValue().get(task);
state BackupConfig config(task);
state Key logUidValue = config.getUidAsKey();
wait(eraseLogData(cx, logUidValue, destUidValue, endVersion != 0 ? Optional<Version>(endVersion) : Optional<Version>()));
return Void();
}
ACTOR static Future<Key> addTask(Reference<ReadYourWritesTransaction> tr, Reference<TaskBucket> taskBucket, UID logUid, TaskCompletionKey completionKey, Key destUidValue, Version endVersion = 0, Reference<TaskFuture> waitFor = Reference<TaskFuture>()) { ACTOR static Future<Key> addTask(Reference<ReadYourWritesTransaction> tr, Reference<TaskBucket> taskBucket, UID logUid, TaskCompletionKey completionKey, Key destUidValue, Version endVersion = 0, Reference<TaskFuture> waitFor = Reference<TaskFuture>()) {
Key key = wait(addBackupTask(EraseLogRangeTaskFunc::name, Key key = wait(addBackupTask(EraseLogRangeTaskFunc::name,
EraseLogRangeTaskFunc::version, EraseLogRangeTaskFunc::version,
@ -2036,16 +2022,23 @@ namespace fileBackup {
return key; return key;
} }
ACTOR static Future<Void> _finish(Reference<ReadYourWritesTransaction> tr, Reference<TaskBucket> taskBucket, Reference<FutureBucket> futureBucket, Reference<Task> task) { ACTOR static Future<Void> _finish(Reference<ReadYourWritesTransaction> tr, Reference<TaskBucket> taskBucket, Reference<FutureBucket> futureBucket, Reference<Task> task) {
state Reference<TaskFuture> taskFuture = futureBucket->unpack(task->params[Task::reservedTaskParamKeyDone]); state Reference<TaskFuture> taskFuture = futureBucket->unpack(task->params[Task::reservedTaskParamKeyDone]);
wait(taskFuture->set(tr, taskBucket) && taskBucket->finish(tr, task)); wait(checkTaskVersion(tr->getDatabase(), task, EraseLogRangeTaskFunc::name, EraseLogRangeTaskFunc::version));
state Version endVersion = Params.endVersion().get(task);
state Key destUidValue = Params.destUidValue().get(task);
state BackupConfig config(task);
state Key logUidValue = config.getUidAsKey();
wait(taskFuture->set(tr, taskBucket) && taskBucket->finish(tr, task) && eraseLogData(tr, logUidValue, destUidValue, endVersion != 0 ? Optional<Version>(endVersion) : Optional<Version>()));
return Void(); return Void();
} }
Future<Void> execute(Database cx, Reference<TaskBucket> tb, Reference<FutureBucket> fb, Reference<Task> task) { return _execute(cx, tb, fb, task); }; Future<Void> execute(Database cx, Reference<TaskBucket> tb, Reference<FutureBucket> fb, Reference<Task> task) { return Void(); };
Future<Void> finish(Reference<ReadYourWritesTransaction> tr, Reference<TaskBucket> tb, Reference<FutureBucket> fb, Reference<Task> task) { return _finish(tr, tb, fb, task); }; Future<Void> finish(Reference<ReadYourWritesTransaction> tr, Reference<TaskBucket> tb, Reference<FutureBucket> fb, Reference<Task> task) { return _finish(tr, tb, fb, task); };
}; };
StringRef EraseLogRangeTaskFunc::name = LiteralStringRef("file_backup_erase_logs_5.2"); StringRef EraseLogRangeTaskFunc::name = LiteralStringRef("file_backup_erase_logs_5.2");
@ -2132,7 +2125,7 @@ namespace fileBackup {
// Do not erase at the first time // Do not erase at the first time
if (prevBeginVersion > 0) { if (prevBeginVersion > 0) {
state Key destUidValue = wait(config.destUidValue().getOrThrow(tr)); state Key destUidValue = wait(config.destUidValue().getOrThrow(tr));
wait(success(EraseLogRangeTaskFunc::addTask(tr, taskBucket, config.getUid(), TaskCompletionKey::joinWith(logDispatchBatchFuture), destUidValue, beginVersion))); wait( eraseLogData(tr, config.getUidAsKey(), destUidValue, Optional<Version>(beginVersion)) );
} }
wait(taskBucket->finish(tr, task)); wait(taskBucket->finish(tr, task));
@ -2183,7 +2176,7 @@ namespace fileBackup {
tr->setOption(FDBTransactionOptions::COMMIT_ON_FIRST_PROXY); tr->setOption(FDBTransactionOptions::COMMIT_ON_FIRST_PROXY);
state Key destUidValue = wait(backup.destUidValue().getOrThrow(tr)); state Key destUidValue = wait(backup.destUidValue().getOrThrow(tr));
wait(success(EraseLogRangeTaskFunc::addTask(tr, taskBucket, backup.getUid(), TaskCompletionKey::noSignal(), destUidValue))); wait( eraseLogData(tr, backup.getUidAsKey(), destUidValue) );
backup.stateEnum().set(tr, EBackupState::STATE_COMPLETED); backup.stateEnum().set(tr, EBackupState::STATE_COMPLETED);
@ -3626,13 +3619,18 @@ public:
state Key destUidValue(BinaryWriter::toValue(uid, Unversioned())); state Key destUidValue(BinaryWriter::toValue(uid, Unversioned()));
if (normalizedRanges.size() == 1) { if (normalizedRanges.size() == 1) {
state Key destUidLookupPath = BinaryWriter::toValue(normalizedRanges[0], IncludeVersion()).withPrefix(destUidLookupPrefix); Standalone<RangeResultRef> existingDestUidValues = wait(tr->getRange(KeyRangeRef(destUidLookupPrefix, strinc(destUidLookupPrefix)), CLIENT_KNOBS->TOO_MANY));
Optional<Key> existingDestUidValue = wait(tr->get(destUidLookupPath)); bool found = false;
if (existingDestUidValue.present()) { for(auto it : existingDestUidValues) {
destUidValue = existingDestUidValue.get(); if( BinaryReader::fromStringRef<KeyRange>(it.key.removePrefix(destUidLookupPrefix), IncludeVersion()) == normalizedRanges[0] ) {
} else { destUidValue = it.value;
found = true;
break;
}
}
if( !found ) {
destUidValue = BinaryWriter::toValue(deterministicRandom()->randomUniqueID(), Unversioned()); destUidValue = BinaryWriter::toValue(deterministicRandom()->randomUniqueID(), Unversioned());
tr->set(destUidLookupPath, destUidValue); tr->set(BinaryWriter::toValue(normalizedRanges[0], IncludeVersion(ProtocolVersion::withSharedMutations())).withPrefix(destUidLookupPrefix), destUidValue);
} }
} }
@ -3820,8 +3818,7 @@ public:
state Key destUidValue = wait(config.destUidValue().getOrThrow(tr)); state Key destUidValue = wait(config.destUidValue().getOrThrow(tr));
wait(success(tr->getReadVersion())); wait(success(tr->getReadVersion()));
wait( eraseLogData(tr, config.getUidAsKey(), destUidValue) );
wait(success(fileBackup::EraseLogRangeTaskFunc::addTask(tr, backupAgent->taskBucket, config.getUid(), TaskCompletionKey::noSignal(), destUidValue)));
config.stateEnum().set(tr, EBackupState::STATE_COMPLETED); config.stateEnum().set(tr, EBackupState::STATE_COMPLETED);
@ -3861,7 +3858,7 @@ public:
// Cancel backup task through tag // Cancel backup task through tag
wait(tag.cancel(tr)); wait(tag.cancel(tr));
wait(success(fileBackup::EraseLogRangeTaskFunc::addTask(tr, backupAgent->taskBucket, config.getUid(), TaskCompletionKey::noSignal(), destUidValue))); wait(eraseLogData(tr, config.getUidAsKey(), destUidValue));
config.stateEnum().set(tr, EBackupState::STATE_ABORTED); config.stateEnum().set(tr, EBackupState::STATE_ABORTED);

View File

@ -145,7 +145,7 @@ ClientKnobs::ClientKnobs(bool randomize) {
init( BACKUP_ERROR_DELAY, 10.0 ); init( BACKUP_ERROR_DELAY, 10.0 );
init( BACKUP_STATUS_DELAY, 40.0 ); init( BACKUP_STATUS_DELAY, 40.0 );
init( BACKUP_STATUS_JITTER, 0.05 ); init( BACKUP_STATUS_JITTER, 0.05 );
init( CLEAR_LOG_RANGE_COUNT, 1500); // transaction size / (size of '\xff\x02/blog/' + size of UID + size of hash result) = 200,000 / (8 + 16 + 8) init( MIN_CLEANUP_SECONDS, 3600.0 );
// Configuration // Configuration
init( DEFAULT_AUTO_PROXIES, 3 ); init( DEFAULT_AUTO_PROXIES, 3 );

View File

@ -131,7 +131,6 @@ public:
int BACKUP_COPY_TASKS; int BACKUP_COPY_TASKS;
int BACKUP_BLOCK_SIZE; int BACKUP_BLOCK_SIZE;
int BACKUP_TASKS_PER_AGENT; int BACKUP_TASKS_PER_AGENT;
int CLEAR_LOG_RANGE_COUNT;
int SIM_BACKUP_TASKS_PER_AGENT; int SIM_BACKUP_TASKS_PER_AGENT;
int BACKUP_RANGEFILE_BLOCK_SIZE; int BACKUP_RANGEFILE_BLOCK_SIZE;
int BACKUP_LOGFILE_BLOCK_SIZE; int BACKUP_LOGFILE_BLOCK_SIZE;
@ -147,6 +146,7 @@ public:
double BACKUP_ERROR_DELAY; double BACKUP_ERROR_DELAY;
double BACKUP_STATUS_DELAY; double BACKUP_STATUS_DELAY;
double BACKUP_STATUS_JITTER; double BACKUP_STATUS_JITTER;
double MIN_CLEANUP_SECONDS;
// Configuration // Configuration
int32_t DEFAULT_AUTO_PROXIES; int32_t DEFAULT_AUTO_PROXIES;

View File

@ -67,7 +67,7 @@ std::map<std::string, std::string> configForToken( std::string const& mode ) {
std::string key = mode.substr(0, pos); std::string key = mode.substr(0, pos);
std::string value = mode.substr(pos+1); std::string value = mode.substr(pos+1);
if( (key == "logs" || key == "proxies" || key == "resolvers" || key == "remote_logs" || key == "log_routers" || key == "satellite_logs" || key == "usable_regions" || key == "repopulate_anti_quorum") && isInteger(value) ) { if( (key == "logs" || key == "proxies" || key == "resolvers" || key == "remote_logs" || key == "log_routers" || key == "usable_regions" || key == "repopulate_anti_quorum") && isInteger(value) ) {
out[p+key] = value; out[p+key] = value;
} }
@ -916,6 +916,7 @@ ACTOR Future<CoordinatorsResult::Type> changeQuorum( Database cx, Reference<IQuo
try { try {
tr.setOption( FDBTransactionOptions::LOCK_AWARE ); tr.setOption( FDBTransactionOptions::LOCK_AWARE );
tr.setOption( FDBTransactionOptions::USE_PROVISIONAL_PROXIES ); tr.setOption( FDBTransactionOptions::USE_PROVISIONAL_PROXIES );
tr.setOption( FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE );
Optional<Value> currentKey = wait( tr.get( coordinatorsKey ) ); Optional<Value> currentKey = wait( tr.get( coordinatorsKey ) );
if (!currentKey.present()) if (!currentKey.present())
@ -1208,8 +1209,6 @@ ACTOR Future<Void> excludeServers( Database cx, vector<AddressExclusion> servers
tr.setOption( FDBTransactionOptions::USE_PROVISIONAL_PROXIES ); tr.setOption( FDBTransactionOptions::USE_PROVISIONAL_PROXIES );
tr.addReadConflictRange( singleKeyRange(excludedServersVersionKey) ); //To conflict with parallel includeServers tr.addReadConflictRange( singleKeyRange(excludedServersVersionKey) ); //To conflict with parallel includeServers
tr.addReadConflictRange( singleKeyRange(moveKeysLockOwnerKey) );
tr.set( moveKeysLockOwnerKey, versionKey );
tr.set( excludedServersVersionKey, excludeVersionKey ); tr.set( excludedServersVersionKey, excludeVersionKey );
for(auto& s : servers) for(auto& s : servers)
tr.set( encodeExcludedServersKey(s), StringRef() ); tr.set( encodeExcludedServersKey(s), StringRef() );
@ -1240,9 +1239,6 @@ ACTOR Future<Void> includeServers( Database cx, vector<AddressExclusion> servers
// includeServers might be used in an emergency transaction, so make sure it is retry-self-conflicting and CAUSAL_WRITE_RISKY // includeServers might be used in an emergency transaction, so make sure it is retry-self-conflicting and CAUSAL_WRITE_RISKY
tr.setOption( FDBTransactionOptions::CAUSAL_WRITE_RISKY ); tr.setOption( FDBTransactionOptions::CAUSAL_WRITE_RISKY );
tr.addReadConflictRange( singleKeyRange(excludedServersVersionKey) ); tr.addReadConflictRange( singleKeyRange(excludedServersVersionKey) );
tr.addReadConflictRange( singleKeyRange(moveKeysLockOwnerKey) );
tr.set( moveKeysLockOwnerKey, versionKey );
tr.set( excludedServersVersionKey, excludeVersionKey ); tr.set( excludedServersVersionKey, excludeVersionKey );
for(auto& s : servers ) { for(auto& s : servers ) {

View File

@ -382,7 +382,9 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"layer_status_incomplete", "layer_status_incomplete",
"database_availability_timeout", "database_availability_timeout",
"consistencycheck_suspendkey_fetch_timeout", "consistencycheck_suspendkey_fetch_timeout",
"consistencycheck_disabled" "consistencycheck_disabled",
"duplicate_mutation_streams",
"duplicate_mutation_fetch_timeout"
] ]
}, },
"issues":[ "issues":[
@ -522,7 +524,8 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"datacenters":[{ "datacenters":[{
"id":"mr", "id":"mr",
"priority":1, "priority":1,
"satellite":1 "satellite":1,
"satellite_logs":2
}], }],
"satellite_redundancy_mode":{ "satellite_redundancy_mode":{
"$enum":[ "$enum":[
@ -603,6 +606,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"max_machine_failures_without_losing_availability":0, "max_machine_failures_without_losing_availability":0,
"total_disk_used_bytes":0, "total_disk_used_bytes":0,
"total_kv_size_bytes":0, "total_kv_size_bytes":0,
"system_kv_size_bytes":0,
"partitions_count":2, "partitions_count":2,
"moving_data":{ "moving_data":{
"total_written_bytes":0, "total_written_bytes":0,
@ -731,7 +735,8 @@ const KeyRef JSONSchemas::clusterConfigurationSchema = LiteralStringRef(R"config
"datacenters":[{ "datacenters":[{
"id":"mr", "id":"mr",
"priority":1, "priority":1,
"satellite":1 "satellite":1,
"satellite_logs":2
}], }],
"satellite_redundancy_mode":{ "satellite_redundancy_mode":{
"$enum":[ "$enum":[

View File

@ -179,7 +179,8 @@ public:
countConnClosedWithoutError.init(LiteralStringRef("Net2.CountConnClosedWithoutError")); countConnClosedWithoutError.init(LiteralStringRef("Net2.CountConnClosedWithoutError"));
} }
Reference<struct Peer> getPeer( NetworkAddress const& address, bool openConnection = true ); Reference<struct Peer> getPeer( NetworkAddress const& address );
Reference<struct Peer> getOrOpenPeer( NetworkAddress const& address, bool startConnectionKeeper=true );
// Returns true if given network address 'address' is one of the address we are listening on. // Returns true if given network address 'address' is one of the address we are listening on.
bool isLocalAddress(const NetworkAddress& address) const; bool isLocalAddress(const NetworkAddress& address) const;
@ -410,7 +411,6 @@ ACTOR Future<Void> connectionKeeper( Reference<Peer> self,
try { try {
if (!conn) { // Always, except for the first loop with an incoming connection if (!conn) { // Always, except for the first loop with an incoming connection
self->outgoingConnectionIdle = true; self->outgoingConnectionIdle = true;
// Wait until there is something to send. // Wait until there is something to send.
while (self->unsent.empty()) { while (self->unsent.empty()) {
if (FlowTransport::transport().isClient() && self->destination.isPublic() && if (FlowTransport::transport().isClient() && self->destination.isPublic() &&
@ -654,7 +654,7 @@ ACTOR static void deliver(TransportData* self, Endpoint destination, ArenaReader
if (self->isLocalAddress(destination.getPrimaryAddress())) { if (self->isLocalAddress(destination.getPrimaryAddress())) {
sendLocal(self, SerializeSource<Endpoint>(Endpoint(self->localAddresses, destination.token)), Endpoint(destination.addresses, WLTOKEN_ENDPOINT_NOT_FOUND)); sendLocal(self, SerializeSource<Endpoint>(Endpoint(self->localAddresses, destination.token)), Endpoint(destination.addresses, WLTOKEN_ENDPOINT_NOT_FOUND));
} else { } else {
Reference<Peer> peer = self->getPeer(destination.getPrimaryAddress()); Reference<Peer> peer = self->getOrOpenPeer(destination.getPrimaryAddress());
sendPacket(self, peer, SerializeSource<Endpoint>(Endpoint(self->localAddresses, destination.token)), Endpoint(destination.addresses, WLTOKEN_ENDPOINT_NOT_FOUND), false); sendPacket(self, peer, SerializeSource<Endpoint>(Endpoint(self->localAddresses, destination.token)), Endpoint(destination.addresses, WLTOKEN_ENDPOINT_NOT_FOUND), false);
} }
} }
@ -908,7 +908,7 @@ ACTOR static Future<Void> connectionReader(
peerAddress = NetworkAddress(pkt.canonicalRemoteIp(), pkt.canonicalRemotePort, true, peerAddress = NetworkAddress(pkt.canonicalRemoteIp(), pkt.canonicalRemotePort, true,
peerAddress.isTLS()); peerAddress.isTLS());
} }
peer = transport->getPeer(peerAddress); peer = transport->getOrOpenPeer(peerAddress, false);
peer->compatible = compatible; peer->compatible = compatible;
peer->incompatibleProtocolVersionNewer = incompatibleProtocolVersionNewer; peer->incompatibleProtocolVersionNewer = incompatibleProtocolVersionNewer;
if (!compatible) { if (!compatible) {
@ -987,18 +987,25 @@ ACTOR static Future<Void> listen( TransportData* self, NetworkAddress listenAddr
} }
} }
Reference<Peer> TransportData::getPeer( NetworkAddress const& address, bool openConnection ) { Reference<Peer> TransportData::getPeer( NetworkAddress const& address ) {
auto peer = peers.find(address); auto peer = peers.find(address);
if (peer != peers.end()) { if (peer != peers.end()) {
return peer->second; return peer->second;
} }
if(!openConnection) { return Reference<Peer>();
return Reference<Peer>(); }
Reference<Peer> TransportData::getOrOpenPeer( NetworkAddress const& address, bool startConnectionKeeper ) {
auto peer = getPeer(address);
if(!peer) {
peer = Reference<Peer>( new Peer(this, address) );
if(startConnectionKeeper) {
peer->connect = connectionKeeper(peer);
}
peers[address] = peer;
} }
Reference<Peer> newPeer = Reference<Peer>( new Peer(this, address) );
newPeer->connect = connectionKeeper(newPeer); return peer;
peers[address] = newPeer;
return newPeer;
} }
bool TransportData::isLocalAddress(const NetworkAddress& address) const { bool TransportData::isLocalAddress(const NetworkAddress& address) const {
@ -1077,7 +1084,7 @@ void FlowTransport::addPeerReference(const Endpoint& endpoint, bool isStream) {
else if (FlowTransport::transport().isClient()) else if (FlowTransport::transport().isClient())
IFailureMonitor::failureMonitor().setStatus(endpoint.getPrimaryAddress(), FailureStatus(false)); IFailureMonitor::failureMonitor().setStatus(endpoint.getPrimaryAddress(), FailureStatus(false));
Reference<Peer> peer = self->getPeer(endpoint.getPrimaryAddress()); Reference<Peer> peer = self->getOrOpenPeer(endpoint.getPrimaryAddress());
if(peer->peerReferences == -1) { if(peer->peerReferences == -1) {
peer->peerReferences = 1; peer->peerReferences = 1;
} else { } else {
@ -1087,7 +1094,7 @@ void FlowTransport::addPeerReference(const Endpoint& endpoint, bool isStream) {
void FlowTransport::removePeerReference(const Endpoint& endpoint, bool isStream) { void FlowTransport::removePeerReference(const Endpoint& endpoint, bool isStream) {
if (!isStream || !endpoint.getPrimaryAddress().isValid()) return; if (!isStream || !endpoint.getPrimaryAddress().isValid()) return;
Reference<Peer> peer = self->getPeer(endpoint.getPrimaryAddress(), false); Reference<Peer> peer = self->getPeer(endpoint.getPrimaryAddress());
if(peer) { if(peer) {
peer->peerReferences--; peer->peerReferences--;
if(peer->peerReferences < 0) { if(peer->peerReferences < 0) {
@ -1246,7 +1253,7 @@ ReliablePacket* FlowTransport::sendReliable( ISerializeSource const& what, const
sendLocal( self, what, destination ); sendLocal( self, what, destination );
return nullptr; return nullptr;
} }
Reference<Peer> peer = self->getPeer(destination.getPrimaryAddress()); Reference<Peer> peer = self->getOrOpenPeer(destination.getPrimaryAddress());
return sendPacket( self, peer, what, destination, true ); return sendPacket( self, peer, what, destination, true );
} }
@ -1260,7 +1267,14 @@ Reference<Peer> FlowTransport::sendUnreliable( ISerializeSource const& what, con
sendLocal( self, what, destination ); sendLocal( self, what, destination );
return Reference<Peer>(); return Reference<Peer>();
} }
Reference<Peer> peer = self->getPeer(destination.getPrimaryAddress(), openConnection); Reference<Peer> peer;
if(openConnection) {
peer = self->getOrOpenPeer(destination.getPrimaryAddress());
}
else {
peer = self->getPeer(destination.getPrimaryAddress());
}
sendPacket( self, peer, what, destination, false ); sendPacket( self, peer, what, destination, false );
return peer; return peer;
} }

View File

@ -125,7 +125,7 @@ struct Peer : public ReferenceCounted<Peer> {
int outstandingReplies; int outstandingReplies;
explicit Peer(TransportData* transport, NetworkAddress const& destination) explicit Peer(TransportData* transport, NetworkAddress const& destination)
: transport(transport), destination(destination), outgoingConnectionIdle(false), lastConnectTime(0.0), : transport(transport), destination(destination), outgoingConnectionIdle(true), lastConnectTime(0.0),
reconnectionDelay(FLOW_KNOBS->INITIAL_RECONNECTION_TIME), compatible(true), outstandingReplies(0), reconnectionDelay(FLOW_KNOBS->INITIAL_RECONNECTION_TIME), compatible(true), outstandingReplies(0),
incompatibleProtocolVersionNewer(false), peerReferences(-1), bytesReceived(0), lastDataPacketSentTime(now()) {} incompatibleProtocolVersionNewer(false), peerReferences(-1), bytesReceived(0), lastDataPacketSentTime(now()) {}

View File

@ -398,8 +398,14 @@ public:
try { try {
bool remoteDCUsedAsSatellite = false; bool remoteDCUsedAsSatellite = false;
std::set<Optional<Key>> satelliteDCs; std::set<Optional<Key>> satelliteDCs;
int32_t desiredSatelliteTLogs = 0;
for(int s = startDC; s < std::min<int>(startDC + (satelliteFallback ? region.satelliteTLogUsableDcsFallback : region.satelliteTLogUsableDcs), region.satellites.size()); s++) { for(int s = startDC; s < std::min<int>(startDC + (satelliteFallback ? region.satelliteTLogUsableDcsFallback : region.satelliteTLogUsableDcs), region.satellites.size()); s++) {
satelliteDCs.insert(region.satellites[s].dcId); satelliteDCs.insert(region.satellites[s].dcId);
if(region.satellites[s].satelliteDesiredTLogCount == -1 || desiredSatelliteTLogs == -1) {
desiredSatelliteTLogs = -1;
} else {
desiredSatelliteTLogs += region.satellites[s].satelliteDesiredTLogCount;
}
if (region.satellites[s].dcId == remoteRegion.dcId) { if (region.satellites[s].dcId == remoteRegion.dcId) {
remoteDCUsedAsSatellite = true; remoteDCUsedAsSatellite = true;
} }
@ -413,9 +419,9 @@ public:
std::transform(remoteLogs.begin(), remoteLogs.end(), std::back_inserter(exclusionWorkerIds), [](const WorkerDetails &in) { return in.interf.id(); }); std::transform(remoteLogs.begin(), remoteLogs.end(), std::back_inserter(exclusionWorkerIds), [](const WorkerDetails &in) { return in.interf.id(); });
} }
if(satelliteFallback) { if(satelliteFallback) {
return getWorkersForTlogs( conf, region.satelliteTLogReplicationFactorFallback, conf.getDesiredSatelliteLogs(region.dcId)*region.satelliteTLogUsableDcsFallback/region.satelliteTLogUsableDcs, region.satelliteTLogPolicyFallback, id_used, checkStable, satelliteDCs, exclusionWorkerIds); return getWorkersForTlogs( conf, region.satelliteTLogReplicationFactorFallback, desiredSatelliteTLogs>0 ? desiredSatelliteTLogs : conf.getDesiredSatelliteLogs(region.dcId)*region.satelliteTLogUsableDcsFallback/region.satelliteTLogUsableDcs, region.satelliteTLogPolicyFallback, id_used, checkStable, satelliteDCs, exclusionWorkerIds);
} else { } else {
return getWorkersForTlogs( conf, region.satelliteTLogReplicationFactor, conf.getDesiredSatelliteLogs(region.dcId), region.satelliteTLogPolicy, id_used, checkStable, satelliteDCs, exclusionWorkerIds); return getWorkersForTlogs( conf, region.satelliteTLogReplicationFactor, desiredSatelliteTLogs>0 ? desiredSatelliteTLogs : conf.getDesiredSatelliteLogs(region.dcId), region.satelliteTLogPolicy, id_used, checkStable, satelliteDCs, exclusionWorkerIds);
} }
} catch (Error &e) { } catch (Error &e) {
if(e.code() != error_code_no_more_servers) { if(e.code() != error_code_no_more_servers) {
@ -462,7 +468,7 @@ public:
deterministicRandom()->randomShuffle(w); deterministicRandom()->randomShuffle(w);
for( int i=0; i < w.size(); i++ ) { for( int i=0; i < w.size(); i++ ) {
id_used[w[i].interf.locality.processId()]++; id_used[w[i].interf.locality.processId()]++;
return WorkerFitnessInfo(w[i], it.first.first, it.first.second); return WorkerFitnessInfo(w[i], std::max(ProcessClass::GoodFit, it.first.first), it.first.second);
} }
} }
} }
@ -518,18 +524,8 @@ public:
RoleFitness() : bestFit(ProcessClass::NeverAssign), worstFit(ProcessClass::NeverAssign), role(ProcessClass::NoRole), count(0), worstIsDegraded(false) {} RoleFitness() : bestFit(ProcessClass::NeverAssign), worstFit(ProcessClass::NeverAssign), role(ProcessClass::NoRole), count(0), worstIsDegraded(false) {}
RoleFitness(RoleFitness first, RoleFitness second, ProcessClass::ClusterRole role) : bestFit(std::min(first.worstFit, second.worstFit)), worstFit(std::max(first.worstFit, second.worstFit)), count(first.count + second.count), role(role) {
if(first.worstFit > second.worstFit) {
worstIsDegraded = first.worstIsDegraded;
} else if(second.worstFit > first.worstFit) {
worstIsDegraded = second.worstIsDegraded;
} else {
worstIsDegraded = first.worstIsDegraded || second.worstIsDegraded;
}
}
RoleFitness( vector<WorkerDetails> workers, ProcessClass::ClusterRole role ) : role(role) { RoleFitness( vector<WorkerDetails> workers, ProcessClass::ClusterRole role ) : role(role) {
worstFit = ProcessClass::BestFit; worstFit = ProcessClass::GoodFit;
worstIsDegraded = false; worstIsDegraded = false;
bestFit = ProcessClass::NeverAssign; bestFit = ProcessClass::NeverAssign;
for(auto& it : workers) { for(auto& it : workers) {
@ -576,6 +572,35 @@ public:
std::string toString() const { return format("%d %d %d %d", bestFit, worstFit, count, worstIsDegraded); } std::string toString() const { return format("%d %d %d %d", bestFit, worstFit, count, worstIsDegraded); }
}; };
struct RoleFitnessPair {
RoleFitness proxy;
RoleFitness resolver;
RoleFitnessPair() {}
RoleFitnessPair(RoleFitness const& proxy, RoleFitness const& resolver) : proxy(proxy), resolver(resolver) {}
bool operator < (RoleFitnessPair const& r) const {
if(proxy.betterFitness(r.proxy)) {
return true;
}
if(r.proxy.betterFitness(proxy)) {
return false;
}
if(resolver.betterFitness(r.resolver)) {
return true;
}
if(r.resolver.betterFitness(resolver)) {
return false;
}
if(proxy.count != r.proxy.count) {
return proxy.count > r.proxy.count;
}
return resolver.count > r.resolver.count;
}
bool operator == (RoleFitnessPair const& r) const { return proxy == r.proxy && resolver == r.resolver; }
};
std::set<Optional<Standalone<StringRef>>> getDatacenters( DatabaseConfiguration const& conf, bool checkStable = false ) { std::set<Optional<Standalone<StringRef>>> getDatacenters( DatabaseConfiguration const& conf, bool checkStable = false ) {
std::set<Optional<Standalone<StringRef>>> result; std::set<Optional<Standalone<StringRef>>> result;
for( auto& it : id_worker ) for( auto& it : id_worker )
@ -776,7 +801,7 @@ public:
auto datacenters = getDatacenters( req.configuration ); auto datacenters = getDatacenters( req.configuration );
RoleFitness bestFitness; RoleFitnessPair bestFitness;
int numEquivalent = 1; int numEquivalent = 1;
Optional<Key> bestDC; Optional<Key> bestDC;
@ -793,7 +818,7 @@ public:
proxies.push_back(first_proxy.worker); proxies.push_back(first_proxy.worker);
resolvers.push_back(first_resolver.worker); resolvers.push_back(first_resolver.worker);
auto fitness = RoleFitness( RoleFitness(proxies, ProcessClass::Proxy), RoleFitness(resolvers, ProcessClass::Resolver), ProcessClass::NoRole ); RoleFitnessPair fitness( RoleFitness(proxies, ProcessClass::Proxy), RoleFitness(resolvers, ProcessClass::Resolver) );
if(dcId == clusterControllerDcId) { if(dcId == clusterControllerDcId) {
bestFitness = fitness; bestFitness = fitness;
@ -839,7 +864,8 @@ public:
if( now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY && if( now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY &&
( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs(), ProcessClass::TLog).betterCount(RoleFitness(tlogs, ProcessClass::TLog)) || ( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs(), ProcessClass::TLog).betterCount(RoleFitness(tlogs, ProcessClass::TLog)) ||
RoleFitness(std::min(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS), std::max(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS), req.configuration.getDesiredProxies()+req.configuration.getDesiredResolvers(), ProcessClass::NoRole).betterCount(bestFitness) ) ) { RoleFitness(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, req.configuration.getDesiredProxies(), ProcessClass::Proxy).betterCount(bestFitness.proxy) ||
RoleFitness(SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS, req.configuration.getDesiredResolvers(), ProcessClass::Resolver).betterCount(bestFitness.resolver) ) ) {
throw operation_failed(); throw operation_failed();
} }
@ -985,10 +1011,14 @@ public:
std::map< Optional<Standalone<StringRef>>, int> id_used; std::map< Optional<Standalone<StringRef>>, int> id_used;
id_used[clusterControllerProcessId]++; id_used[clusterControllerProcessId]++;
WorkerFitnessInfo mworker = getWorkerForRoleInDatacenter(clusterControllerDcId, ProcessClass::Master, ProcessClass::NeverAssign, db.config, id_used, true); WorkerFitnessInfo mworker = getWorkerForRoleInDatacenter(clusterControllerDcId, ProcessClass::Master, ProcessClass::NeverAssign, db.config, id_used, true);
auto newMasterFit = mworker.worker.processClass.machineClassFitness( ProcessClass::Master );
if(db.config.isExcludedServer(mworker.worker.interf.address())) {
newMasterFit = std::max(newMasterFit, ProcessClass::ExcludeFit);
}
if ( oldMasterFit < mworker.fitness ) if ( oldMasterFit < newMasterFit )
return false; return false;
if ( oldMasterFit > mworker.fitness || ( dbi.master.locality.processId() == clusterControllerProcessId && mworker.worker.interf.locality.processId() != clusterControllerProcessId ) ) if ( oldMasterFit > newMasterFit || ( dbi.master.locality.processId() == clusterControllerProcessId && mworker.worker.interf.locality.processId() != clusterControllerProcessId ) )
return true; return true;
std::set<Optional<Key>> primaryDC; std::set<Optional<Key>> primaryDC;
@ -1018,6 +1048,7 @@ public:
if(oldTLogFit < newTLogFit) return false; if(oldTLogFit < newTLogFit) return false;
bool oldSatelliteFallback = false; bool oldSatelliteFallback = false;
for(auto& logSet : dbi.logSystemConfig.tLogs) { for(auto& logSet : dbi.logSystemConfig.tLogs) {
if(logSet.isLocal && logSet.locality == tagLocalitySatellite) { if(logSet.isLocal && logSet.locality == tagLocalitySatellite) {
oldSatelliteFallback = logSet.tLogPolicy->info() != region.satelliteTLogPolicy->info(); oldSatelliteFallback = logSet.tLogPolicy->info() != region.satelliteTLogPolicy->info();
@ -1031,11 +1062,42 @@ public:
auto newSatelliteTLogs = region.satelliteTLogReplicationFactor > 0 ? getWorkersForSatelliteLogs(db.config, region, remoteRegion, id_used, newSatelliteFallback, true) : satellite_tlogs; auto newSatelliteTLogs = region.satelliteTLogReplicationFactor > 0 ? getWorkersForSatelliteLogs(db.config, region, remoteRegion, id_used, newSatelliteFallback, true) : satellite_tlogs;
RoleFitness newSatelliteTLogFit(newSatelliteTLogs, ProcessClass::TLog); RoleFitness newSatelliteTLogFit(newSatelliteTLogs, ProcessClass::TLog);
if(oldSatelliteTLogFit < newSatelliteTLogFit) std::map<Optional<Key>,int32_t> satellite_priority;
return false; for(auto& r : region.satellites) {
satellite_priority[r.dcId] = r.priority;
}
int32_t oldSatelliteRegionFit = std::numeric_limits<int32_t>::max();
for(auto& it : satellite_tlogs) {
if(satellite_priority.count(it.interf.locality.dcId())) {
oldSatelliteRegionFit = std::min(oldSatelliteRegionFit, satellite_priority[it.interf.locality.dcId()]);
} else {
oldSatelliteRegionFit = -1;
}
}
int32_t newSatelliteRegionFit = std::numeric_limits<int32_t>::max();
for(auto& it : newSatelliteTLogs) {
if(satellite_priority.count(it.interf.locality.dcId())) {
newSatelliteRegionFit = std::min(newSatelliteRegionFit, satellite_priority[it.interf.locality.dcId()]);
} else {
newSatelliteRegionFit = -1;
}
}
if(oldSatelliteFallback && !newSatelliteFallback)
return true;
if(!oldSatelliteFallback && newSatelliteFallback) if(!oldSatelliteFallback && newSatelliteFallback)
return false; return false;
if(oldSatelliteRegionFit < newSatelliteRegionFit)
return true;
if(oldSatelliteRegionFit > newSatelliteRegionFit)
return false;
if(oldSatelliteTLogFit < newSatelliteTLogFit)
return false;
RoleFitness oldRemoteTLogFit(remote_tlogs, ProcessClass::TLog); RoleFitness oldRemoteTLogFit(remote_tlogs, ProcessClass::TLog);
std::vector<UID> exclusionWorkerIds; std::vector<UID> exclusionWorkerIds;
auto fn = [](const WorkerDetails &in) { return in.interf.id(); }; auto fn = [](const WorkerDetails &in) { return in.interf.id(); };
@ -1059,7 +1121,7 @@ public:
} }
if(oldLogRoutersFit < newLogRoutersFit) return false; if(oldLogRoutersFit < newLogRoutersFit) return false;
// Check proxy/resolver fitness // Check proxy/resolver fitness
RoleFitness oldInFit(RoleFitness(proxyClasses, ProcessClass::Proxy), RoleFitness(resolverClasses, ProcessClass::Resolver), ProcessClass::NoRole); RoleFitnessPair oldInFit(RoleFitness(proxyClasses, ProcessClass::Proxy), RoleFitness(resolverClasses, ProcessClass::Resolver));
auto first_resolver = getWorkerForRoleInDatacenter( clusterControllerDcId, ProcessClass::Resolver, ProcessClass::ExcludeFit, db.config, id_used, true ); auto first_resolver = getWorkerForRoleInDatacenter( clusterControllerDcId, ProcessClass::Resolver, ProcessClass::ExcludeFit, db.config, id_used, true );
auto first_proxy = getWorkerForRoleInDatacenter( clusterControllerDcId, ProcessClass::Proxy, ProcessClass::ExcludeFit, db.config, id_used, true ); auto first_proxy = getWorkerForRoleInDatacenter( clusterControllerDcId, ProcessClass::Proxy, ProcessClass::ExcludeFit, db.config, id_used, true );
@ -1069,12 +1131,15 @@ public:
proxies.push_back(first_proxy.worker); proxies.push_back(first_proxy.worker);
resolvers.push_back(first_resolver.worker); resolvers.push_back(first_resolver.worker);
RoleFitness newInFit(RoleFitness(proxies, ProcessClass::Proxy), RoleFitness(resolvers, ProcessClass::Resolver), ProcessClass::NoRole); RoleFitnessPair newInFit(RoleFitness(proxies, ProcessClass::Proxy), RoleFitness(resolvers, ProcessClass::Resolver));
if(oldInFit.betterFitness(newInFit)) return false; if(oldInFit.proxy.betterFitness(newInFit.proxy) || oldInFit.resolver.betterFitness(newInFit.resolver)) {
if(oldTLogFit > newTLogFit || oldInFit > newInFit || (oldSatelliteFallback && !newSatelliteFallback) || oldSatelliteTLogFit > newSatelliteTLogFit || oldRemoteTLogFit > newRemoteTLogFit || oldLogRoutersFit > newLogRoutersFit) { return false;
TraceEvent("BetterMasterExists", id).detail("OldMasterFit", oldMasterFit).detail("NewMasterFit", mworker.fitness) }
if(oldTLogFit > newTLogFit || oldInFit > newInFit || oldSatelliteTLogFit > newSatelliteTLogFit || oldRemoteTLogFit > newRemoteTLogFit || oldLogRoutersFit > newLogRoutersFit) {
TraceEvent("BetterMasterExists", id).detail("OldMasterFit", oldMasterFit).detail("NewMasterFit", newMasterFit)
.detail("OldTLogFit", oldTLogFit.toString()).detail("NewTLogFit", newTLogFit.toString()) .detail("OldTLogFit", oldTLogFit.toString()).detail("NewTLogFit", newTLogFit.toString())
.detail("OldInFit", oldInFit.toString()).detail("NewInFit", newInFit.toString()) .detail("OldProxyFit", oldInFit.proxy.toString()).detail("NewProxyFit", newInFit.proxy.toString())
.detail("OldResolverFit", oldInFit.resolver.toString()).detail("NewResolverFit", newInFit.resolver.toString())
.detail("OldSatelliteFit", oldSatelliteTLogFit.toString()).detail("NewSatelliteFit", newSatelliteTLogFit.toString()) .detail("OldSatelliteFit", oldSatelliteTLogFit.toString()).detail("NewSatelliteFit", newSatelliteTLogFit.toString())
.detail("OldRemoteFit", oldRemoteTLogFit.toString()).detail("NewRemoteFit", newRemoteTLogFit.toString()) .detail("OldRemoteFit", oldRemoteTLogFit.toString()).detail("NewRemoteFit", newRemoteTLogFit.toString())
.detail("OldRouterFit", oldLogRoutersFit.toString()).detail("NewRouterFit", newLogRoutersFit.toString()) .detail("OldRouterFit", oldLogRoutersFit.toString()).detail("NewRouterFit", newLogRoutersFit.toString())
@ -1161,11 +1226,36 @@ public:
Optional<UID> recruitingRatekeeperID; Optional<UID> recruitingRatekeeperID;
AsyncVar<bool> recruitRatekeeper; AsyncVar<bool> recruitRatekeeper;
CounterCollection clusterControllerMetrics;
Counter openDatabaseRequests;
Counter registerWorkerRequests;
Counter getWorkersRequests;
Counter getClientWorkersRequests;
Counter registerMasterRequests;
Counter getServerDBInfoRequests;
Counter statusRequests;
Counter failureMonitoringRequests;
Counter serversFailed;
Counter serversUnfailed;
ClusterControllerData( ClusterControllerFullInterface const& ccInterface, LocalityData const& locality ) ClusterControllerData( ClusterControllerFullInterface const& ccInterface, LocalityData const& locality )
: clusterControllerProcessId(locality.processId()), clusterControllerDcId(locality.dcId()), : clusterControllerProcessId(locality.processId()), clusterControllerDcId(locality.dcId()),
id(ccInterface.id()), ac(false), outstandingRequestChecker(Void()), gotProcessClasses(false), id(ccInterface.id()), ac(false), outstandingRequestChecker(Void()), gotProcessClasses(false),
gotFullyRecoveredConfig(false), startTime(now()), datacenterVersionDifference(0), gotFullyRecoveredConfig(false), startTime(now()), datacenterVersionDifference(0),
versionDifferenceUpdated(false), recruitingDistributor(false), recruitRatekeeper(false) versionDifferenceUpdated(false), recruitingDistributor(false), recruitRatekeeper(false),
clusterControllerMetrics("ClusterController", id.toString()),
openDatabaseRequests("OpenDatabaseRequests", clusterControllerMetrics),
registerWorkerRequests("RegisterWorkerRequests", clusterControllerMetrics),
getWorkersRequests("GetWorkersRequests", clusterControllerMetrics),
getClientWorkersRequests("GetClientWorkersRequests", clusterControllerMetrics),
registerMasterRequests("RegisterMasterRequests", clusterControllerMetrics),
getServerDBInfoRequests("GetServerDBInfoRequests", clusterControllerMetrics),
statusRequests("StatusRequests", clusterControllerMetrics),
failureMonitoringRequests("FailureMonitoringRequests", clusterControllerMetrics),
serversFailed("ServersFailed", clusterControllerMetrics),
serversUnfailed("ServersUnfailed", clusterControllerMetrics)
{ {
CachedSerialization<ServerDBInfo> newInfoCache = db.serverInfo->get(); CachedSerialization<ServerDBInfo> newInfoCache = db.serverInfo->get();
auto& serverInfo = newInfoCache.mutate(); auto& serverInfo = newInfoCache.mutate();
@ -1518,7 +1608,7 @@ struct FailureStatusInfo {
}; };
//The failure monitor client relies on the fact that the failure detection server will not declare itself failed //The failure monitor client relies on the fact that the failure detection server will not declare itself failed
ACTOR Future<Void> failureDetectionServer( UID uniqueID, ClusterControllerData::DBInfo* db, FutureStream< FailureMonitoringRequest > requests ) { ACTOR Future<Void> failureDetectionServer( UID uniqueID, ClusterControllerData* self, FutureStream< FailureMonitoringRequest > requests ) {
state Version currentVersion = 0; state Version currentVersion = 0;
state std::map<NetworkAddressList, FailureStatusInfo> currentStatus; // The status at currentVersion state std::map<NetworkAddressList, FailureStatusInfo> currentStatus; // The status at currentVersion
state std::deque<SystemFailureStatus> statusHistory; // The last change in statusHistory is from currentVersion-1 to currentVersion state std::deque<SystemFailureStatus> statusHistory; // The last change in statusHistory is from currentVersion-1 to currentVersion
@ -1527,6 +1617,7 @@ ACTOR Future<Void> failureDetectionServer( UID uniqueID, ClusterControllerData::
loop choose { loop choose {
when ( FailureMonitoringRequest req = waitNext( requests ) ) { when ( FailureMonitoringRequest req = waitNext( requests ) ) {
++self->failureMonitoringRequests;
if ( req.senderStatus.present() ) { if ( req.senderStatus.present() ) {
// Update the status of requester, if necessary // Update the status of requester, if necessary
auto& stat = currentStatus[ req.addresses ]; auto& stat = currentStatus[ req.addresses ];
@ -1536,6 +1627,12 @@ ACTOR Future<Void> failureDetectionServer( UID uniqueID, ClusterControllerData::
stat.insertRequest(now()); stat.insertRequest(now());
if (req.senderStatus != stat.status) { if (req.senderStatus != stat.status) {
if(newStat.failed) {
++self->serversFailed;
}
else {
++self->serversUnfailed;
}
TraceEvent("FailureDetectionStatus", uniqueID).detail("System", req.addresses.toString()).detail("Status", newStat.failed ? "Failed" : "OK").detail("Why", "Request"); TraceEvent("FailureDetectionStatus", uniqueID).detail("System", req.addresses.toString()).detail("Status", newStat.failed ? "Failed" : "OK").detail("Why", "Request");
statusHistory.push_back( SystemFailureStatus( req.addresses, newStat ) ); statusHistory.push_back( SystemFailureStatus( req.addresses, newStat ) );
++currentVersion; ++currentVersion;
@ -1615,7 +1712,7 @@ ACTOR Future<Void> failureDetectionServer( UID uniqueID, ClusterControllerData::
//TraceEvent("FailureDetectionPoll", uniqueID).detail("PivotDelay", pivotDelay).detail("Clients", currentStatus.size()); //TraceEvent("FailureDetectionPoll", uniqueID).detail("PivotDelay", pivotDelay).detail("Clients", currentStatus.size());
//TraceEvent("FailureDetectionAcceptableDelay").detail("Delay", acceptableDelay1000); //TraceEvent("FailureDetectionAcceptableDelay").detail("Delay", acceptableDelay1000);
bool tooManyLogGenerations = std::max(db->unfinishedRecoveries, db->logGenerations) > CLIENT_KNOBS->FAILURE_MAX_GENERATIONS; bool tooManyLogGenerations = std::max(self->db.unfinishedRecoveries, self->db.logGenerations) > CLIENT_KNOBS->FAILURE_MAX_GENERATIONS;
for(auto it = currentStatus.begin(); it != currentStatus.end(); ) { for(auto it = currentStatus.begin(); it != currentStatus.end(); ) {
double delay = t - it->second.lastRequestTime; double delay = t - it->second.lastRequestTime;
@ -1624,7 +1721,8 @@ ACTOR Future<Void> failureDetectionServer( UID uniqueID, ClusterControllerData::
( delay > pivotDelay * 2 + FLOW_KNOBS->SERVER_REQUEST_INTERVAL + CLIENT_KNOBS->FAILURE_MIN_DELAY || delay > CLIENT_KNOBS->FAILURE_MAX_DELAY ) ) ) { ( delay > pivotDelay * 2 + FLOW_KNOBS->SERVER_REQUEST_INTERVAL + CLIENT_KNOBS->FAILURE_MIN_DELAY || delay > CLIENT_KNOBS->FAILURE_MAX_DELAY ) ) ) {
//printf("Failure Detection Server: Status of '%s' is now '%s' after %f sec\n", it->first.toString().c_str(), "Failed", now() - it->second.lastRequestTime); //printf("Failure Detection Server: Status of '%s' is now '%s' after %f sec\n", it->first.toString().c_str(), "Failed", now() - it->second.lastRequestTime);
TraceEvent("FailureDetectionStatus", uniqueID).detail("System", describe(it->first)).detail("Status","Failed").detail("Why", "Timeout").detail("LastRequestAge", delay) TraceEvent("FailureDetectionStatus", uniqueID).detail("System", describe(it->first)).detail("Status","Failed").detail("Why", "Timeout").detail("LastRequestAge", delay)
.detail("PivotDelay", pivotDelay).detail("UnfinishedRecoveries", db->unfinishedRecoveries).detail("LogGenerations", db->logGenerations); .detail("PivotDelay", pivotDelay).detail("UnfinishedRecoveries", self->db.unfinishedRecoveries).detail("LogGenerations", self->db.logGenerations);
++self->serversFailed;
statusHistory.push_back( SystemFailureStatus( it->first, FailureStatus(true) ) ); statusHistory.push_back( SystemFailureStatus( it->first, FailureStatus(true) ) );
++currentVersion; ++currentVersion;
it = currentStatus.erase(it); it = currentStatus.erase(it);
@ -2005,6 +2103,7 @@ ACTOR Future<Void> statusServer(FutureStream< StatusRequest> requests,
try { try {
// Wait til first request is ready // Wait til first request is ready
StatusRequest req = waitNext(requests); StatusRequest req = waitNext(requests);
++self->statusRequests;
requests_batch.push_back(req); requests_batch.push_back(req);
// Earliest time at which we may begin a new request // Earliest time at which we may begin a new request
@ -2584,7 +2683,7 @@ ACTOR Future<Void> clusterControllerCore( ClusterControllerFullInterface interf,
state uint64_t step = 0; state uint64_t step = 0;
state Future<ErrorOr<Void>> error = errorOr( actorCollection( self.addActor.getFuture() ) ); state Future<ErrorOr<Void>> error = errorOr( actorCollection( self.addActor.getFuture() ) );
self.addActor.send( failureDetectionServer( self.id, &self.db, interf.clientInterface.failureMonitoring.getFuture() ) ); self.addActor.send( failureDetectionServer( self.id, &self, interf.clientInterface.failureMonitoring.getFuture() ) );
self.addActor.send( clusterWatchDatabase( &self, &self.db ) ); // Start the master database self.addActor.send( clusterWatchDatabase( &self, &self.db ) ); // Start the master database
self.addActor.send( self.updateWorkerList.init( self.db.db ) ); self.addActor.send( self.updateWorkerList.init( self.db.db ) );
self.addActor.send( statusServer( interf.clientInterface.databaseStatus.getFuture(), &self, coordinators)); self.addActor.send( statusServer( interf.clientInterface.databaseStatus.getFuture(), &self, coordinators));
@ -2598,6 +2697,8 @@ ACTOR Future<Void> clusterControllerCore( ClusterControllerFullInterface interf,
self.addActor.send( handleForcedRecoveries(&self, interf) ); self.addActor.send( handleForcedRecoveries(&self, interf) );
self.addActor.send( monitorDataDistributor(&self) ); self.addActor.send( monitorDataDistributor(&self) );
self.addActor.send( monitorRatekeeper(&self) ); self.addActor.send( monitorRatekeeper(&self) );
self.addActor.send( traceCounters("ClusterControllerMetrics", self.id, SERVER_KNOBS->STORAGE_LOGGING_DELAY, &self.clusterControllerMetrics, self.id.toString() + "/ClusterControllerMetrics") );
//printf("%s: I am the cluster controller\n", g_network->getLocalAddress().toString().c_str()); //printf("%s: I am the cluster controller\n", g_network->getLocalAddress().toString().c_str());
loop choose { loop choose {
@ -2613,6 +2714,7 @@ ACTOR Future<Void> clusterControllerCore( ClusterControllerFullInterface interf,
return Void(); return Void();
} }
when( OpenDatabaseRequest req = waitNext( interf.clientInterface.openDatabase.getFuture() ) ) { when( OpenDatabaseRequest req = waitNext( interf.clientInterface.openDatabase.getFuture() ) ) {
++self.openDatabaseRequests;
self.addActor.send(clusterOpenDatabase(&self.db, req)); self.addActor.send(clusterOpenDatabase(&self.db, req));
} }
when( RecruitFromConfigurationRequest req = waitNext( interf.recruitFromConfiguration.getFuture() ) ) { when( RecruitFromConfigurationRequest req = waitNext( interf.recruitFromConfiguration.getFuture() ) ) {
@ -2625,9 +2727,11 @@ ACTOR Future<Void> clusterControllerCore( ClusterControllerFullInterface interf,
clusterRecruitStorage( &self, req ); clusterRecruitStorage( &self, req );
} }
when( RegisterWorkerRequest req = waitNext( interf.registerWorker.getFuture() ) ) { when( RegisterWorkerRequest req = waitNext( interf.registerWorker.getFuture() ) ) {
++self.registerWorkerRequests;
registerWorker( req, &self ); registerWorker( req, &self );
} }
when( GetWorkersRequest req = waitNext( interf.getWorkers.getFuture() ) ) { when( GetWorkersRequest req = waitNext( interf.getWorkers.getFuture() ) ) {
++self.getWorkersRequests;
vector<WorkerDetails> workers; vector<WorkerDetails> workers;
for(auto& it : self.id_worker) { for(auto& it : self.id_worker) {
@ -2645,6 +2749,7 @@ ACTOR Future<Void> clusterControllerCore( ClusterControllerFullInterface interf,
req.reply.send( workers ); req.reply.send( workers );
} }
when( GetClientWorkersRequest req = waitNext( interf.clientInterface.getClientWorkers.getFuture() ) ) { when( GetClientWorkersRequest req = waitNext( interf.clientInterface.getClientWorkers.getFuture() ) ) {
++self.getClientWorkersRequests;
vector<ClientWorkerInterface> workers; vector<ClientWorkerInterface> workers;
for(auto& it : self.id_worker) { for(auto& it : self.id_worker) {
if (it.second.details.processClass.classType() != ProcessClass::TesterClass) { if (it.second.details.processClass.classType() != ProcessClass::TesterClass) {
@ -2661,9 +2766,11 @@ ACTOR Future<Void> clusterControllerCore( ClusterControllerFullInterface interf,
TraceEvent("CoordinationPingSent", self.id).detail("TimeStep", message.timeStep); TraceEvent("CoordinationPingSent", self.id).detail("TimeStep", message.timeStep);
} }
when( RegisterMasterRequest req = waitNext( interf.registerMaster.getFuture() ) ) { when( RegisterMasterRequest req = waitNext( interf.registerMaster.getFuture() ) ) {
++self.registerMasterRequests;
clusterRegisterMaster( &self, req ); clusterRegisterMaster( &self, req );
} }
when( GetServerDBInfoRequest req = waitNext( interf.getServerDBInfo.getFuture() ) ) { when( GetServerDBInfoRequest req = waitNext( interf.getServerDBInfo.getFuture() ) ) {
++self.getServerDBInfoRequests;
self.addActor.send( self.addActor.send(
clusterGetServerInfo(&self.db, req.knownServerInfoID, req.issues, req.incompatiblePeers, req.reply)); clusterGetServerInfo(&self.db, req.knownServerInfoID, req.issues, req.incompatiblePeers, req.reply));
} }

View File

@ -189,7 +189,7 @@ public:
int priority; int priority;
explicit TCTeamInfo(vector<Reference<TCServerInfo>> const& servers) explicit TCTeamInfo(vector<Reference<TCServerInfo>> const& servers)
: servers(servers), healthy(true), priority(PRIORITY_TEAM_HEALTHY), wrongConfiguration(false) { : servers(servers), healthy(true), priority(SERVER_KNOBS->PRIORITY_TEAM_HEALTHY), wrongConfiguration(false) {
if (servers.empty()) { if (servers.empty()) {
TraceEvent(SevInfo, "ConstructTCTeamFromEmptyServers"); TraceEvent(SevInfo, "ConstructTCTeamFromEmptyServers");
} }
@ -2558,7 +2558,7 @@ ACTOR Future<Void> machineTeamRemover(DDTeamCollection* self) {
} }
// To avoid removing machine teams too fast, which is unlikely happen though // To avoid removing machine teams too fast, which is unlikely happen though
wait( delay(SERVER_KNOBS->TR_REMOVE_MACHINE_TEAM_DELAY) ); wait( delay(SERVER_KNOBS->TR_REMOVE_MACHINE_TEAM_DELAY, TaskPriority::DataDistribution) );
wait(waitUntilHealthy(self)); wait(waitUntilHealthy(self));
// Wait for the badTeamRemover() to avoid the potential race between adding the bad team (add the team tracker) // Wait for the badTeamRemover() to avoid the potential race between adding the bad team (add the team tracker)
@ -2681,7 +2681,7 @@ ACTOR Future<Void> serverTeamRemover(DDTeamCollection* self) {
removeServerTeamDelay = removeServerTeamDelay / 100; removeServerTeamDelay = removeServerTeamDelay / 100;
} }
// To avoid removing server teams too fast, which is unlikely happen though // To avoid removing server teams too fast, which is unlikely happen though
wait(delay(removeServerTeamDelay)); wait(delay(removeServerTeamDelay, TaskPriority::DataDistribution));
wait(waitUntilHealthy(self, SERVER_KNOBS->TR_REMOVE_SERVER_TEAM_EXTRA_DELAY)); wait(waitUntilHealthy(self, SERVER_KNOBS->TR_REMOVE_SERVER_TEAM_EXTRA_DELAY));
// Wait for the badTeamRemover() to avoid the potential race between // Wait for the badTeamRemover() to avoid the potential race between
@ -2865,25 +2865,25 @@ ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> tea
state int lastPriority = team->getPriority(); state int lastPriority = team->getPriority();
if( serversLeft < self->configuration.storageTeamSize ) { if( serversLeft < self->configuration.storageTeamSize ) {
if( serversLeft == 0 ) if( serversLeft == 0 )
team->setPriority( PRIORITY_TEAM_0_LEFT ); team->setPriority( SERVER_KNOBS->PRIORITY_TEAM_0_LEFT );
else if( serversLeft == 1 ) else if( serversLeft == 1 )
team->setPriority( PRIORITY_TEAM_1_LEFT ); team->setPriority( SERVER_KNOBS->PRIORITY_TEAM_1_LEFT );
else if( serversLeft == 2 ) else if( serversLeft == 2 )
team->setPriority( PRIORITY_TEAM_2_LEFT ); team->setPriority( SERVER_KNOBS->PRIORITY_TEAM_2_LEFT );
else else
team->setPriority( PRIORITY_TEAM_UNHEALTHY ); team->setPriority( SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY );
} }
else if ( badTeam || anyWrongConfiguration ) { else if ( badTeam || anyWrongConfiguration ) {
if ( redundantTeam ) { if ( redundantTeam ) {
team->setPriority( PRIORITY_TEAM_REDUNDANT ); team->setPriority( SERVER_KNOBS->PRIORITY_TEAM_REDUNDANT );
} else { } else {
team->setPriority( PRIORITY_TEAM_UNHEALTHY ); team->setPriority( SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY );
} }
} }
else if( anyUndesired ) else if( anyUndesired )
team->setPriority( PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER ); team->setPriority( SERVER_KNOBS->PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER );
else else
team->setPriority( PRIORITY_TEAM_HEALTHY ); team->setPriority( SERVER_KNOBS->PRIORITY_TEAM_HEALTHY );
if(lastPriority != team->getPriority()) { if(lastPriority != team->getPriority()) {
self->priority_teams[lastPriority]--; self->priority_teams[lastPriority]--;
@ -2901,13 +2901,13 @@ ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> tea
for(int i=0; i<shards.size(); i++) { for(int i=0; i<shards.size(); i++) {
int maxPriority = team->getPriority(); int maxPriority = team->getPriority();
if(maxPriority < PRIORITY_TEAM_0_LEFT) { if(maxPriority < SERVER_KNOBS->PRIORITY_TEAM_0_LEFT) {
auto teams = self->shardsAffectedByTeamFailure->getTeamsFor( shards[i] ); auto teams = self->shardsAffectedByTeamFailure->getTeamsFor( shards[i] );
for( int j=0; j < teams.first.size()+teams.second.size(); j++) { for( int j=0; j < teams.first.size()+teams.second.size(); j++) {
// t is the team in primary DC or the remote DC // t is the team in primary DC or the remote DC
auto& t = j < teams.first.size() ? teams.first[j] : teams.second[j-teams.first.size()]; auto& t = j < teams.first.size() ? teams.first[j] : teams.second[j-teams.first.size()];
if( !t.servers.size() ) { if( !t.servers.size() ) {
maxPriority = PRIORITY_TEAM_0_LEFT; maxPriority = SERVER_KNOBS->PRIORITY_TEAM_0_LEFT;
break; break;
} }
@ -2931,8 +2931,8 @@ ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> tea
// false We want to differentiate the redundant_team from unhealthy_team in // false We want to differentiate the redundant_team from unhealthy_team in
// terms of relocate priority // terms of relocate priority
maxPriority = maxPriority =
std::max<int>(maxPriority, redundantTeam ? PRIORITY_TEAM_REDUNDANT std::max<int>(maxPriority, redundantTeam ? SERVER_KNOBS->PRIORITY_TEAM_REDUNDANT
: PRIORITY_TEAM_UNHEALTHY); : SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY);
} }
} else { } else {
TEST(true); // A removed server is still associated with a team in SABTF TEST(true); // A removed server is still associated with a team in SABTF
@ -3064,7 +3064,7 @@ ACTOR Future<vector<std::pair<StorageServerInterface, ProcessClass>>> getServerL
} }
ACTOR Future<Void> waitServerListChange( DDTeamCollection* self, FutureStream<Void> serverRemoved ) { ACTOR Future<Void> waitServerListChange( DDTeamCollection* self, FutureStream<Void> serverRemoved ) {
state Future<Void> checkSignal = delay(SERVER_KNOBS->SERVER_LIST_DELAY); state Future<Void> checkSignal = delay(SERVER_KNOBS->SERVER_LIST_DELAY, TaskPriority::DataDistributionLaunch);
state Future<vector<std::pair<StorageServerInterface, ProcessClass>>> serverListAndProcessClasses = Never(); state Future<vector<std::pair<StorageServerInterface, ProcessClass>>> serverListAndProcessClasses = Never();
state bool isFetchingResults = false; state bool isFetchingResults = false;
state Transaction tr(self->cx); state Transaction tr(self->cx);
@ -3102,7 +3102,7 @@ ACTOR Future<Void> waitServerListChange( DDTeamCollection* self, FutureStream<Vo
} }
tr = Transaction(self->cx); tr = Transaction(self->cx);
checkSignal = delay(SERVER_KNOBS->SERVER_LIST_DELAY); checkSignal = delay(SERVER_KNOBS->SERVER_LIST_DELAY, TaskPriority::DataDistributionLaunch);
} }
when( waitNext( serverRemoved ) ) { when( waitNext( serverRemoved ) ) {
if( isFetchingResults ) { if( isFetchingResults ) {
@ -3136,7 +3136,7 @@ ACTOR Future<Void> waitHealthyZoneChange( DDTeamCollection* self ) {
healthyZoneTimeout = Never(); healthyZoneTimeout = Never();
} else if (p.second > tr.getReadVersion().get()) { } else if (p.second > tr.getReadVersion().get()) {
double timeoutSeconds = (p.second - tr.getReadVersion().get())/(double)SERVER_KNOBS->VERSIONS_PER_SECOND; double timeoutSeconds = (p.second - tr.getReadVersion().get())/(double)SERVER_KNOBS->VERSIONS_PER_SECOND;
healthyZoneTimeout = delay(timeoutSeconds); healthyZoneTimeout = delay(timeoutSeconds, TaskPriority::DataDistribution);
if(self->healthyZone.get() != p.first) { if(self->healthyZone.get() != p.first) {
TraceEvent("MaintenanceZoneStart", self->distributorId).detail("ZoneID", printable(p.first)).detail("EndVersion", p.second).detail("Duration", timeoutSeconds); TraceEvent("MaintenanceZoneStart", self->distributorId).detail("ZoneID", printable(p.first)).detail("EndVersion", p.second).detail("Duration", timeoutSeconds);
self->healthyZone.set(p.first); self->healthyZone.set(p.first);
@ -3591,7 +3591,7 @@ ACTOR Future<Void> checkAndRemoveInvalidLocalityAddr(DDTeamCollection* self) {
loop { loop {
try { try {
wait(delay(SERVER_KNOBS->DD_CHECK_INVALID_LOCALITY_DELAY)); wait(delay(SERVER_KNOBS->DD_CHECK_INVALID_LOCALITY_DELAY, TaskPriority::DataDistribution));
// Because worker's processId can be changed when its locality is changed, we cannot watch on the old // Because worker's processId can be changed when its locality is changed, we cannot watch on the old
// processId; This actor is inactive most time, so iterating all workers incurs little performance overhead. // processId; This actor is inactive most time, so iterating all workers incurs little performance overhead.
@ -3770,7 +3770,7 @@ ACTOR Future<Void> storageRecruiter( DDTeamCollection* self, Reference<AsyncVar<
} }
when( wait( self->restartRecruiting.onTrigger() ) ) {} when( wait( self->restartRecruiting.onTrigger() ) ) {}
} }
wait( delay(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY) ); wait( delay(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY, TaskPriority::DataDistribution) );
} catch( Error &e ) { } catch( Error &e ) {
if(e.code() != error_code_timed_out) { if(e.code() != error_code_timed_out) {
throw; throw;
@ -3830,7 +3830,7 @@ ACTOR Future<Void> remoteRecovered( Reference<AsyncVar<struct ServerDBInfo>> db
ACTOR Future<Void> monitorHealthyTeams( DDTeamCollection* self ) { ACTOR Future<Void> monitorHealthyTeams( DDTeamCollection* self ) {
loop choose { loop choose {
when ( wait(self->zeroHealthyTeams->get() ? delay(SERVER_KNOBS->DD_ZERO_HEALTHY_TEAM_DELAY) : Never()) ) { when ( wait(self->zeroHealthyTeams->get() ? delay(SERVER_KNOBS->DD_ZERO_HEALTHY_TEAM_DELAY, TaskPriority::DataDistribution) : Never()) ) {
self->doBuildTeams = true; self->doBuildTeams = true;
wait( DDTeamCollection::checkBuildTeams(self) ); wait( DDTeamCollection::checkBuildTeams(self) );
} }
@ -4174,9 +4174,21 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self)
.detail( "InFlight", 0 ) .detail( "InFlight", 0 )
.detail( "InQueue", 0 ) .detail( "InQueue", 0 )
.detail( "AverageShardSize", -1 ) .detail( "AverageShardSize", -1 )
.detail( "LowPriorityRelocations", 0 ) .detail( "UnhealthyRelocations", 0 )
.detail( "HighPriorityRelocations", 0 )
.detail( "HighestPriority", 0 ) .detail( "HighestPriority", 0 )
.detail( "BytesWritten", 0 )
.detail( "PriorityRecoverMove", 0 )
.detail( "PriorityRebalanceUnderutilizedTeam", 0 )
.detail( "PriorityRebalannceOverutilizedTeam", 0)
.detail( "PriorityTeamHealthy", 0 )
.detail( "PriorityTeamContainsUndesiredServer", 0 )
.detail( "PriorityTeamRedundant", 0 )
.detail( "PriorityMergeShard", 0 )
.detail( "PriorityTeamUnhealthy", 0 )
.detail( "PriorityTeam2Left", 0 )
.detail( "PriorityTeam1Left", 0 )
.detail( "PriorityTeam0Left", 0 )
.detail( "PrioritySplitShard", 0 )
.trackLatest( "MovingData" ); .trackLatest( "MovingData" );
TraceEvent("TotalDataInFlight", self->ddId).detail("Primary", true).detail("TotalBytes", 0).detail("UnhealthyServers", 0).detail("HighestPriority", 0).trackLatest("TotalDataInFlight"); TraceEvent("TotalDataInFlight", self->ddId).detail("Primary", true).detail("TotalBytes", 0).detail("UnhealthyServers", 0).detail("HighestPriority", 0).trackLatest("TotalDataInFlight");
@ -4219,7 +4231,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self)
if (!unhealthy && configuration.usableRegions > 1) { if (!unhealthy && configuration.usableRegions > 1) {
unhealthy = initData->shards[shard].remoteSrc.size() != configuration.storageTeamSize; unhealthy = initData->shards[shard].remoteSrc.size() != configuration.storageTeamSize;
} }
output.send( RelocateShard( keys, unhealthy ? PRIORITY_TEAM_UNHEALTHY : PRIORITY_RECOVER_MOVE ) ); output.send( RelocateShard( keys, unhealthy ? SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY : SERVER_KNOBS->PRIORITY_RECOVER_MOVE ) );
} }
wait( yield(TaskPriority::DataDistribution) ); wait( yield(TaskPriority::DataDistribution) );
} }

View File

@ -38,33 +38,6 @@ struct RelocateShard {
RelocateShard( KeyRange const& keys, int priority ) : keys(keys), priority(priority) {} RelocateShard( KeyRange const& keys, int priority ) : keys(keys), priority(priority) {}
}; };
// Higher priorities are executed first
// Priority/100 is the "priority group"/"superpriority". Priority inversion
// is possible within but not between priority groups; fewer priority groups
// mean better worst case time bounds
enum {
PRIORITY_REBALANCE_SHARD = 100,
PRIORITY_RECOVER_MOVE = 110,
PRIORITY_REBALANCE_UNDERUTILIZED_TEAM = 120,
PRIORITY_REBALANCE_OVERUTILIZED_TEAM = 121,
PRIORITY_TEAM_HEALTHY = 140,
PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER = 150,
// Set removing_redundant_team priority lower than merge/split_shard_priority,
// so that removing redundant teams does not block merge/split shards.
PRIORITY_TEAM_REDUNDANT = 200,
PRIORITY_MERGE_SHARD = 340,
PRIORITY_SPLIT_SHARD = 350,
PRIORITY_TEAM_UNHEALTHY = 800,
PRIORITY_TEAM_2_LEFT = 809,
PRIORITY_TEAM_1_LEFT = 900,
PRIORITY_TEAM_0_LEFT = 999
};
enum { enum {
SOME_SHARED = 2, SOME_SHARED = 2,
NONE_SHARED = 3 NONE_SHARED = 3

View File

@ -37,6 +37,9 @@
struct RelocateData { struct RelocateData {
KeyRange keys; KeyRange keys;
int priority; int priority;
int boundaryPriority;
int healthPriority;
double startTime; double startTime;
UID randomId; UID randomId;
int workFactor; int workFactor;
@ -45,34 +48,42 @@ struct RelocateData {
bool wantsNewServers; bool wantsNewServers;
TraceInterval interval; TraceInterval interval;
RelocateData() : startTime(-1), priority(-1), workFactor(0), wantsNewServers(false), interval("QueuedRelocation") {} RelocateData() : startTime(-1), priority(-1), boundaryPriority(-1), healthPriority(-1), workFactor(0), wantsNewServers(false), interval("QueuedRelocation") {}
RelocateData( RelocateShard const& rs ) : keys(rs.keys), priority(rs.priority), startTime(now()), randomId(deterministicRandom()->randomUniqueID()), workFactor(0), explicit RelocateData( RelocateShard const& rs ) : keys(rs.keys), priority(rs.priority), boundaryPriority(isBoundaryPriority(rs.priority) ? rs.priority : -1), healthPriority(isHealthPriority(rs.priority) ? rs.priority : -1), startTime(now()), randomId(deterministicRandom()->randomUniqueID()), workFactor(0),
wantsNewServers( wantsNewServers(
rs.priority == PRIORITY_REBALANCE_SHARD || rs.priority == SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM ||
rs.priority == PRIORITY_REBALANCE_OVERUTILIZED_TEAM || rs.priority == SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM ||
rs.priority == PRIORITY_REBALANCE_UNDERUTILIZED_TEAM || rs.priority == SERVER_KNOBS->PRIORITY_SPLIT_SHARD ||
rs.priority == PRIORITY_SPLIT_SHARD || rs.priority == SERVER_KNOBS->PRIORITY_TEAM_REDUNDANT ||
rs.priority == PRIORITY_TEAM_REDUNDANT ||
mergeWantsNewServers(rs.keys, rs.priority)), interval("QueuedRelocation") {} mergeWantsNewServers(rs.keys, rs.priority)), interval("QueuedRelocation") {}
static bool mergeWantsNewServers(KeyRangeRef keys, int priority) { static bool mergeWantsNewServers(KeyRangeRef keys, int priority) {
return priority == PRIORITY_MERGE_SHARD && return priority == SERVER_KNOBS->PRIORITY_MERGE_SHARD &&
(SERVER_KNOBS->MERGE_ONTO_NEW_TEAM == 2 || (SERVER_KNOBS->MERGE_ONTO_NEW_TEAM == 2 ||
(SERVER_KNOBS->MERGE_ONTO_NEW_TEAM == 1 && keys.begin.startsWith(LiteralStringRef("\xff")))); (SERVER_KNOBS->MERGE_ONTO_NEW_TEAM == 1 && keys.begin.startsWith(LiteralStringRef("\xff"))));
} }
static bool isHealthPriority(int priority) {
return priority == SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY ||
priority == SERVER_KNOBS->PRIORITY_TEAM_2_LEFT ||
priority == SERVER_KNOBS->PRIORITY_TEAM_1_LEFT ||
priority == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT ||
priority == SERVER_KNOBS->PRIORITY_TEAM_REDUNDANT ||
priority == SERVER_KNOBS->PRIORITY_TEAM_HEALTHY ||
priority == SERVER_KNOBS->PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER;
}
static bool isBoundaryPriority(int priority) {
return priority == SERVER_KNOBS->PRIORITY_SPLIT_SHARD ||
priority == SERVER_KNOBS->PRIORITY_MERGE_SHARD;
}
bool operator> (const RelocateData& rhs) const { bool operator> (const RelocateData& rhs) const {
return priority != rhs.priority ? priority > rhs.priority : ( startTime != rhs.startTime ? startTime < rhs.startTime : randomId > rhs.randomId ); return priority != rhs.priority ? priority > rhs.priority : ( startTime != rhs.startTime ? startTime < rhs.startTime : randomId > rhs.randomId );
} }
bool operator== (const RelocateData& rhs) const { bool operator== (const RelocateData& rhs) const {
return priority == rhs.priority && keys == rhs.keys && startTime == rhs.startTime && workFactor == rhs.workFactor && src == rhs.src && completeSources == rhs.completeSources && wantsNewServers == rhs.wantsNewServers && randomId == rhs.randomId; return priority == rhs.priority && boundaryPriority == rhs.boundaryPriority && healthPriority == rhs.healthPriority && keys == rhs.keys && startTime == rhs.startTime && workFactor == rhs.workFactor && src == rhs.src && completeSources == rhs.completeSources && wantsNewServers == rhs.wantsNewServers && randomId == rhs.randomId;
}
bool changesBoundaries() {
return priority == PRIORITY_MERGE_SHARD ||
priority == PRIORITY_SPLIT_SHARD ||
priority == PRIORITY_RECOVER_MOVE;
} }
}; };
@ -285,9 +296,9 @@ int getWorkFactor( RelocateData const& relocation ) {
// Avoid the divide by 0! // Avoid the divide by 0!
ASSERT( relocation.src.size() ); ASSERT( relocation.src.size() );
if( relocation.priority >= PRIORITY_TEAM_1_LEFT ) if( relocation.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_1_LEFT || relocation.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT )
return WORK_FULL_UTILIZATION / SERVER_KNOBS->RELOCATION_PARALLELISM_PER_SOURCE_SERVER; return WORK_FULL_UTILIZATION / SERVER_KNOBS->RELOCATION_PARALLELISM_PER_SOURCE_SERVER;
else if( relocation.priority >= PRIORITY_TEAM_2_LEFT ) else if( relocation.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_2_LEFT )
return WORK_FULL_UTILIZATION / 2 / SERVER_KNOBS->RELOCATION_PARALLELISM_PER_SOURCE_SERVER; return WORK_FULL_UTILIZATION / 2 / SERVER_KNOBS->RELOCATION_PARALLELISM_PER_SOURCE_SERVER;
else // for now we assume that any message at a lower priority can best be assumed to have a full team left for work else // for now we assume that any message at a lower priority can best be assumed to have a full team left for work
return WORK_FULL_UTILIZATION / relocation.src.size() / SERVER_KNOBS->RELOCATION_PARALLELISM_PER_SOURCE_SERVER; return WORK_FULL_UTILIZATION / relocation.src.size() / SERVER_KNOBS->RELOCATION_PARALLELISM_PER_SOURCE_SERVER;
@ -384,20 +395,22 @@ struct DDQueueData {
std::map<int, int> priority_relocations; std::map<int, int> priority_relocations;
int unhealthyRelocations; int unhealthyRelocations;
void startRelocation(int priority) { void startRelocation(int priority, int healthPriority) {
// Although PRIORITY_TEAM_REDUNDANT has lower priority than split and merge shard movement, // Although PRIORITY_TEAM_REDUNDANT has lower priority than split and merge shard movement,
// we must count it into unhealthyRelocations; because team removers relies on unhealthyRelocations to // we must count it into unhealthyRelocations; because team removers relies on unhealthyRelocations to
// ensure a team remover will not start before the previous one finishes removing a team and move away data // ensure a team remover will not start before the previous one finishes removing a team and move away data
// NOTE: split and merge shard have higher priority. If they have to wait for unhealthyRelocations = 0, // NOTE: split and merge shard have higher priority. If they have to wait for unhealthyRelocations = 0,
// deadlock may happen: split/merge shard waits for unhealthyRelocations, while blocks team_redundant. // deadlock may happen: split/merge shard waits for unhealthyRelocations, while blocks team_redundant.
if (priority >= PRIORITY_TEAM_UNHEALTHY || priority == PRIORITY_TEAM_REDUNDANT) { if (healthPriority == SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY || healthPriority == SERVER_KNOBS->PRIORITY_TEAM_2_LEFT ||
healthPriority == SERVER_KNOBS->PRIORITY_TEAM_1_LEFT || healthPriority == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT || healthPriority == SERVER_KNOBS->PRIORITY_TEAM_REDUNDANT) {
unhealthyRelocations++; unhealthyRelocations++;
rawProcessingUnhealthy->set(true); rawProcessingUnhealthy->set(true);
} }
priority_relocations[priority]++; priority_relocations[priority]++;
} }
void finishRelocation(int priority) { void finishRelocation(int priority, int healthPriority) {
if (priority >= PRIORITY_TEAM_UNHEALTHY || priority == PRIORITY_TEAM_REDUNDANT) { if (healthPriority == SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY || healthPriority == SERVER_KNOBS->PRIORITY_TEAM_2_LEFT ||
healthPriority == SERVER_KNOBS->PRIORITY_TEAM_1_LEFT || healthPriority == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT || healthPriority == SERVER_KNOBS->PRIORITY_TEAM_REDUNDANT) {
unhealthyRelocations--; unhealthyRelocations--;
ASSERT(unhealthyRelocations >= 0); ASSERT(unhealthyRelocations >= 0);
if(unhealthyRelocations == 0) { if(unhealthyRelocations == 0) {
@ -524,7 +537,7 @@ struct DDQueueData {
state Transaction tr(cx); state Transaction tr(cx);
// FIXME: is the merge case needed // FIXME: is the merge case needed
if( input.priority == PRIORITY_MERGE_SHARD ) { if( input.priority == SERVER_KNOBS->PRIORITY_MERGE_SHARD ) {
wait( delay( 0.5, decrementPriority(decrementPriority(TaskPriority::DataDistribution )) ) ); wait( delay( 0.5, decrementPriority(decrementPriority(TaskPriority::DataDistribution )) ) );
} else { } else {
wait( delay( 0.0001, TaskPriority::DataDistributionLaunch ) ); wait( delay( 0.0001, TaskPriority::DataDistributionLaunch ) );
@ -586,10 +599,14 @@ struct DDQueueData {
} }
//This function cannot handle relocation requests which split a shard into three pieces //This function cannot handle relocation requests which split a shard into three pieces
void queueRelocation( RelocateData rd, std::set<UID> &serversToLaunchFrom ) { void queueRelocation( RelocateShard rs, std::set<UID> &serversToLaunchFrom ) {
//TraceEvent("QueueRelocationBegin").detail("Begin", rd.keys.begin).detail("End", rd.keys.end); //TraceEvent("QueueRelocationBegin").detail("Begin", rd.keys.begin).detail("End", rd.keys.end);
// remove all items from both queues that are fully contained in the new relocation (i.e. will be overwritten) // remove all items from both queues that are fully contained in the new relocation (i.e. will be overwritten)
RelocateData rd(rs);
bool hasHealthPriority = RelocateData::isHealthPriority( rd.priority );
bool hasBoundaryPriority = RelocateData::isBoundaryPriority( rd.priority );
auto ranges = queueMap.intersectingRanges( rd.keys ); auto ranges = queueMap.intersectingRanges( rd.keys );
for(auto r = ranges.begin(); r != ranges.end(); ++r ) { for(auto r = ranges.begin(); r != ranges.end(); ++r ) {
RelocateData& rrs = r->value(); RelocateData& rrs = r->value();
@ -611,9 +628,13 @@ struct DDQueueData {
if( foundActiveFetching || foundActiveRelocation ) { if( foundActiveFetching || foundActiveRelocation ) {
rd.wantsNewServers |= rrs.wantsNewServers; rd.wantsNewServers |= rrs.wantsNewServers;
rd.startTime = std::min( rd.startTime, rrs.startTime ); rd.startTime = std::min( rd.startTime, rrs.startTime );
if ((rrs.priority >= PRIORITY_TEAM_UNHEALTHY || rrs.priority == PRIORITY_TEAM_REDUNDANT) && if(!hasHealthPriority) {
rd.changesBoundaries()) rd.healthPriority = std::max(rd.healthPriority, rrs.healthPriority);
rd.priority = std::max( rd.priority, rrs.priority ); }
if(!hasBoundaryPriority) {
rd.boundaryPriority = std::max(rd.boundaryPriority, rrs.boundaryPriority);
}
rd.priority = std::max(rd.priority, std::max(rd.boundaryPriority, rd.healthPriority));
} }
if( rd.keys.contains( rrs.keys ) ) { if( rd.keys.contains( rrs.keys ) ) {
@ -631,7 +652,7 @@ struct DDQueueData {
/*TraceEvent(rrs.interval.end(), mi.id()).detail("Result","Cancelled") /*TraceEvent(rrs.interval.end(), mi.id()).detail("Result","Cancelled")
.detail("WasFetching", foundActiveFetching).detail("Contained", rd.keys.contains( rrs.keys ));*/ .detail("WasFetching", foundActiveFetching).detail("Contained", rd.keys.contains( rrs.keys ));*/
queuedRelocations--; queuedRelocations--;
finishRelocation(rrs.priority); finishRelocation(rrs.priority, rrs.healthPriority);
} }
} }
@ -658,7 +679,7 @@ struct DDQueueData {
.detail("KeyBegin", rrs.keys.begin).detail("KeyEnd", rrs.keys.end) .detail("KeyBegin", rrs.keys.begin).detail("KeyEnd", rrs.keys.end)
.detail("Priority", rrs.priority).detail("WantsNewServers", rrs.wantsNewServers);*/ .detail("Priority", rrs.priority).detail("WantsNewServers", rrs.wantsNewServers);*/
queuedRelocations++; queuedRelocations++;
startRelocation(rrs.priority); startRelocation(rrs.priority, rrs.healthPriority);
fetchingSourcesQueue.insert( rrs ); fetchingSourcesQueue.insert( rrs );
getSourceActors.insert( rrs.keys, getSourceServersForRange( cx, rrs, fetchSourceServersComplete ) ); getSourceActors.insert( rrs.keys, getSourceServersForRange( cx, rrs, fetchSourceServersComplete ) );
@ -678,7 +699,7 @@ struct DDQueueData {
.detail("KeyBegin", newData.keys.begin).detail("KeyEnd", newData.keys.end) .detail("KeyBegin", newData.keys.begin).detail("KeyEnd", newData.keys.end)
.detail("Priority", newData.priority).detail("WantsNewServers", newData.wantsNewServers);*/ .detail("Priority", newData.priority).detail("WantsNewServers", newData.wantsNewServers);*/
queuedRelocations++; queuedRelocations++;
startRelocation(newData.priority); startRelocation(newData.priority, newData.healthPriority);
foundActiveRelocation = true; foundActiveRelocation = true;
} }
@ -773,7 +794,7 @@ struct DDQueueData {
for(auto it = intersectingInFlight.begin(); it != intersectingInFlight.end(); ++it) { for(auto it = intersectingInFlight.begin(); it != intersectingInFlight.end(); ++it) {
if (fetchKeysComplete.count(it->value()) && inFlightActors.liveActorAt(it->range().begin) && if (fetchKeysComplete.count(it->value()) && inFlightActors.liveActorAt(it->range().begin) &&
!rd.keys.contains(it->range()) && it->value().priority >= rd.priority && !rd.keys.contains(it->range()) && it->value().priority >= rd.priority &&
rd.priority < PRIORITY_TEAM_UNHEALTHY) { rd.healthPriority < SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY) {
/*TraceEvent("OverlappingInFlight", distributorId) /*TraceEvent("OverlappingInFlight", distributorId)
.detail("KeyBegin", it->value().keys.begin) .detail("KeyBegin", it->value().keys.begin)
.detail("KeyEnd", it->value().keys.end) .detail("KeyEnd", it->value().keys.end)
@ -813,7 +834,7 @@ struct DDQueueData {
//TraceEvent(rd.interval.end(), distributorId).detail("Result","Success"); //TraceEvent(rd.interval.end(), distributorId).detail("Result","Success");
queuedRelocations--; queuedRelocations--;
finishRelocation(rd.priority); finishRelocation(rd.priority, rd.healthPriority);
// now we are launching: remove this entry from the queue of all the src servers // now we are launching: remove this entry from the queue of all the src servers
for( int i = 0; i < rd.src.size(); i++ ) { for( int i = 0; i < rd.src.size(); i++ ) {
@ -841,7 +862,7 @@ struct DDQueueData {
launch( rrs, busymap ); launch( rrs, busymap );
activeRelocations++; activeRelocations++;
startRelocation(rrs.priority); startRelocation(rrs.priority, rrs.healthPriority);
inFlightActors.insert( rrs.keys, dataDistributionRelocator( this, rrs ) ); inFlightActors.insert( rrs.keys, dataDistributionRelocator( this, rrs ) );
} }
@ -912,10 +933,10 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
bestTeams.clear(); bestTeams.clear();
while( tciIndex < self->teamCollections.size() ) { while( tciIndex < self->teamCollections.size() ) {
double inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_HEALTHY; double inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_HEALTHY;
if(rd.priority >= PRIORITY_TEAM_UNHEALTHY) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_UNHEALTHY; if(rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY || rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_2_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_UNHEALTHY;
if(rd.priority >= PRIORITY_TEAM_1_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_ONE_LEFT; if(rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_1_LEFT || rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_ONE_LEFT;
auto req = GetTeamRequest(rd.wantsNewServers, rd.priority == PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, true, inflightPenalty); auto req = GetTeamRequest(rd.wantsNewServers, rd.priority == SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, true, inflightPenalty);
req.sources = rd.src; req.sources = rd.src;
req.completeSources = rd.completeSources; req.completeSources = rd.completeSources;
Optional<Reference<IDataDistributionTeam>> bestTeam = wait(brokenPromiseToNever(self->teamCollections[tciIndex].getTeam.getReply(req))); Optional<Reference<IDataDistributionTeam>> bestTeam = wait(brokenPromiseToNever(self->teamCollections[tciIndex].getTeam.getReply(req)));
@ -1154,7 +1175,7 @@ ACTOR Future<bool> rebalanceTeams( DDQueueData* self, int priority, Reference<ID
std::vector<KeyRange> shards = self->shardsAffectedByTeamFailure->getShardsFor( ShardsAffectedByTeamFailure::Team( sourceTeam->getServerIDs(), primary ) ); std::vector<KeyRange> shards = self->shardsAffectedByTeamFailure->getShardsFor( ShardsAffectedByTeamFailure::Team( sourceTeam->getServerIDs(), primary ) );
for( int i = 0; i < shards.size(); i++ ) { for( int i = 0; i < shards.size(); i++ ) {
if( moveShard == shards[i] ) { if( moveShard == shards[i] ) {
TraceEvent(priority == PRIORITY_REBALANCE_OVERUTILIZED_TEAM ? "BgDDMountainChopper" : "BgDDValleyFiller", self->distributorId) TraceEvent(priority == SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM ? "BgDDMountainChopper" : "BgDDValleyFiller", self->distributorId)
.detail("SourceBytes", sourceBytes) .detail("SourceBytes", sourceBytes)
.detail("DestBytes", destBytes) .detail("DestBytes", destBytes)
.detail("ShardBytes", metrics.bytes) .detail("ShardBytes", metrics.bytes)
@ -1197,7 +1218,7 @@ ACTOR Future<Void> BgDDMountainChopper( DDQueueData* self, int teamCollectionInd
std::max(rebalancePollingInterval, SERVER_KNOBS->BG_REBALANCE_SWITCH_CHECK_INTERVAL); std::max(rebalancePollingInterval, SERVER_KNOBS->BG_REBALANCE_SWITCH_CHECK_INTERVAL);
continue; continue;
} }
if (self->priority_relocations[PRIORITY_REBALANCE_OVERUTILIZED_TEAM] < if (self->priority_relocations[SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM] <
SERVER_KNOBS->DD_REBALANCE_PARALLELISM) { SERVER_KNOBS->DD_REBALANCE_PARALLELISM) {
state Optional<Reference<IDataDistributionTeam>> randomTeam = wait(brokenPromiseToNever( state Optional<Reference<IDataDistributionTeam>> randomTeam = wait(brokenPromiseToNever(
self->teamCollections[teamCollectionIndex].getTeam.getReply(GetTeamRequest(true, false, true)))); self->teamCollections[teamCollectionIndex].getTeam.getReply(GetTeamRequest(true, false, true))));
@ -1208,7 +1229,7 @@ ACTOR Future<Void> BgDDMountainChopper( DDQueueData* self, int teamCollectionInd
GetTeamRequest(true, true, false)))); GetTeamRequest(true, true, false))));
if (loadedTeam.present()) { if (loadedTeam.present()) {
bool moved = bool moved =
wait(rebalanceTeams(self, PRIORITY_REBALANCE_OVERUTILIZED_TEAM, loadedTeam.get(), wait(rebalanceTeams(self, SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM, loadedTeam.get(),
randomTeam.get(), teamCollectionIndex == 0)); randomTeam.get(), teamCollectionIndex == 0));
if (moved) { if (moved) {
resetCount = 0; resetCount = 0;
@ -1266,7 +1287,7 @@ ACTOR Future<Void> BgDDValleyFiller( DDQueueData* self, int teamCollectionIndex)
std::max(rebalancePollingInterval, SERVER_KNOBS->BG_REBALANCE_SWITCH_CHECK_INTERVAL); std::max(rebalancePollingInterval, SERVER_KNOBS->BG_REBALANCE_SWITCH_CHECK_INTERVAL);
continue; continue;
} }
if (self->priority_relocations[PRIORITY_REBALANCE_UNDERUTILIZED_TEAM] < if (self->priority_relocations[SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM] <
SERVER_KNOBS->DD_REBALANCE_PARALLELISM) { SERVER_KNOBS->DD_REBALANCE_PARALLELISM) {
state Optional<Reference<IDataDistributionTeam>> randomTeam = wait(brokenPromiseToNever( state Optional<Reference<IDataDistributionTeam>> randomTeam = wait(brokenPromiseToNever(
self->teamCollections[teamCollectionIndex].getTeam.getReply(GetTeamRequest(true, false, false)))); self->teamCollections[teamCollectionIndex].getTeam.getReply(GetTeamRequest(true, false, false))));
@ -1276,7 +1297,7 @@ ACTOR Future<Void> BgDDValleyFiller( DDQueueData* self, int teamCollectionIndex)
if (unloadedTeam.present()) { if (unloadedTeam.present()) {
if (unloadedTeam.get()->getMinFreeSpaceRatio() > SERVER_KNOBS->FREE_SPACE_RATIO_DD_CUTOFF) { if (unloadedTeam.get()->getMinFreeSpaceRatio() > SERVER_KNOBS->FREE_SPACE_RATIO_DD_CUTOFF) {
bool moved = bool moved =
wait(rebalanceTeams(self, PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, randomTeam.get(), wait(rebalanceTeams(self, SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, randomTeam.get(),
unloadedTeam.get(), teamCollectionIndex == 0)); unloadedTeam.get(), teamCollectionIndex == 0));
if (moved) { if (moved) {
resetCount = 0; resetCount = 0;
@ -1382,7 +1403,7 @@ ACTOR Future<Void> dataDistributionQueue(
} }
when ( RelocateData done = waitNext( self.relocationComplete.getFuture() ) ) { when ( RelocateData done = waitNext( self.relocationComplete.getFuture() ) ) {
self.activeRelocations--; self.activeRelocations--;
self.finishRelocation(done.priority); self.finishRelocation(done.priority, done.healthPriority);
self.fetchKeysComplete.erase( done ); self.fetchKeysComplete.erase( done );
//self.logRelocation( done, "ShardRelocatorDone" ); //self.logRelocation( done, "ShardRelocatorDone" );
actors.add( tag( delay(0, TaskPriority::DataDistributionLaunch), done.keys, rangesComplete ) ); actors.add( tag( delay(0, TaskPriority::DataDistributionLaunch), done.keys, rangesComplete ) );
@ -1400,24 +1421,32 @@ ACTOR Future<Void> dataDistributionQueue(
recordMetrics = delay(SERVER_KNOBS->DD_QUEUE_LOGGING_INTERVAL); recordMetrics = delay(SERVER_KNOBS->DD_QUEUE_LOGGING_INTERVAL);
int lowPriorityRelocations = 0, highPriorityRelocations = 0, highestPriorityRelocation = 0; int highestPriorityRelocation = 0;
for( auto it = self.priority_relocations.begin(); it != self.priority_relocations.end(); ++it ) { for( auto it = self.priority_relocations.begin(); it != self.priority_relocations.end(); ++it ) {
if (it->second) if (it->second) {
highestPriorityRelocation = std::max(highestPriorityRelocation, it->first); highestPriorityRelocation = std::max(highestPriorityRelocation, it->first);
if( it->first < 200 ) }
lowPriorityRelocations += it->second;
else
highPriorityRelocations += it->second;
} }
TraceEvent("MovingData", distributorId) TraceEvent("MovingData", distributorId)
.detail( "InFlight", self.activeRelocations ) .detail( "InFlight", self.activeRelocations )
.detail( "InQueue", self.queuedRelocations ) .detail( "InQueue", self.queuedRelocations )
.detail( "AverageShardSize", req.getFuture().isReady() ? req.getFuture().get() : -1 ) .detail( "AverageShardSize", req.getFuture().isReady() ? req.getFuture().get() : -1 )
.detail( "LowPriorityRelocations", lowPriorityRelocations ) .detail( "UnhealthyRelocations", self.unhealthyRelocations )
.detail( "HighPriorityRelocations", highPriorityRelocations )
.detail( "HighestPriority", highestPriorityRelocation ) .detail( "HighestPriority", highestPriorityRelocation )
.detail( "BytesWritten", self.bytesWritten ) .detail( "BytesWritten", self.bytesWritten )
.detail( "PriorityRecoverMove", self.priority_relocations[SERVER_KNOBS->PRIORITY_RECOVER_MOVE] )
.detail( "PriorityRebalanceUnderutilizedTeam", self.priority_relocations[SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM] )
.detail( "PriorityRebalannceOverutilizedTeam", self.priority_relocations[SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM] )
.detail( "PriorityTeamHealthy", self.priority_relocations[SERVER_KNOBS->PRIORITY_TEAM_HEALTHY] )
.detail( "PriorityTeamContainsUndesiredServer", self.priority_relocations[SERVER_KNOBS->PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER] )
.detail( "PriorityTeamRedundant", self.priority_relocations[SERVER_KNOBS->PRIORITY_TEAM_REDUNDANT] )
.detail( "PriorityMergeShard", self.priority_relocations[SERVER_KNOBS->PRIORITY_MERGE_SHARD] )
.detail( "PriorityTeamUnhealthy", self.priority_relocations[SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY] )
.detail( "PriorityTeam2Left", self.priority_relocations[SERVER_KNOBS->PRIORITY_TEAM_2_LEFT] )
.detail( "PriorityTeam1Left", self.priority_relocations[SERVER_KNOBS->PRIORITY_TEAM_1_LEFT] )
.detail( "PriorityTeam0Left", self.priority_relocations[SERVER_KNOBS->PRIORITY_TEAM_0_LEFT] )
.detail( "PrioritySplitShard", self.priority_relocations[SERVER_KNOBS->PRIORITY_SPLIT_SHARD] )
.trackLatest( "MovingData" ); .trackLatest( "MovingData" );
} }
when ( wait( self.error.getFuture() ) ) {} // Propagate errors from dataDistributionRelocator when ( wait( self.error.getFuture() ) ) {} // Propagate errors from dataDistributionRelocator

View File

@ -69,6 +69,7 @@ struct DataDistributionTracker {
KeyRangeMap< ShardTrackedData > shards; KeyRangeMap< ShardTrackedData > shards;
ActorCollection sizeChanges; ActorCollection sizeChanges;
int64_t systemSizeEstimate;
Reference<AsyncVar<int64_t>> dbSizeEstimate; Reference<AsyncVar<int64_t>> dbSizeEstimate;
Reference<AsyncVar<Optional<int64_t>>> maxShardSize; Reference<AsyncVar<Optional<int64_t>>> maxShardSize;
Future<Void> maxShardSizeUpdater; Future<Void> maxShardSizeUpdater;
@ -81,7 +82,7 @@ struct DataDistributionTracker {
Reference<AsyncVar<bool>> anyZeroHealthyTeams; Reference<AsyncVar<bool>> anyZeroHealthyTeams;
DataDistributionTracker(Database cx, UID distributorId, Promise<Void> const& readyToStart, PromiseStream<RelocateShard> const& output, Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure, Reference<AsyncVar<bool>> anyZeroHealthyTeams) DataDistributionTracker(Database cx, UID distributorId, Promise<Void> const& readyToStart, PromiseStream<RelocateShard> const& output, Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure, Reference<AsyncVar<bool>> anyZeroHealthyTeams)
: cx(cx), distributorId( distributorId ), dbSizeEstimate( new AsyncVar<int64_t>() ), : cx(cx), distributorId( distributorId ), dbSizeEstimate( new AsyncVar<int64_t>() ), systemSizeEstimate(0),
maxShardSize( new AsyncVar<Optional<int64_t>>() ), maxShardSize( new AsyncVar<Optional<int64_t>>() ),
sizeChanges(false), readyToStart(readyToStart), output( output ), shardsAffectedByTeamFailure(shardsAffectedByTeamFailure), anyZeroHealthyTeams(anyZeroHealthyTeams) {} sizeChanges(false), readyToStart(readyToStart), output( output ), shardsAffectedByTeamFailure(shardsAffectedByTeamFailure), anyZeroHealthyTeams(anyZeroHealthyTeams) {}
@ -138,8 +139,7 @@ int64_t getMaxShardSize( double dbSizeEstimate ) {
ACTOR Future<Void> trackShardBytes( ACTOR Future<Void> trackShardBytes(
DataDistributionTracker* self, DataDistributionTracker* self,
KeyRange keys, KeyRange keys,
Reference<AsyncVar<Optional<StorageMetrics>>> shardSize, Reference<AsyncVar<Optional<StorageMetrics>>> shardSize)
bool addToSizeEstimate = true)
{ {
wait( delay( 0, TaskPriority::DataDistribution ) ); wait( delay( 0, TaskPriority::DataDistribution ) );
@ -203,8 +203,12 @@ ACTOR Future<Void> trackShardBytes(
.detail("OldShardSize", shardSize->get().present() ? shardSize->get().get().metrics.bytes : 0) .detail("OldShardSize", shardSize->get().present() ? shardSize->get().get().metrics.bytes : 0)
.detail("TrackerID", trackerID);*/ .detail("TrackerID", trackerID);*/
if( shardSize->get().present() && addToSizeEstimate ) if( shardSize->get().present() ) {
self->dbSizeEstimate->set( self->dbSizeEstimate->get() + metrics.bytes - shardSize->get().get().bytes ); self->dbSizeEstimate->set( self->dbSizeEstimate->get() + metrics.bytes - shardSize->get().get().bytes );
if(keys.begin >= systemKeys.begin) {
self->systemSizeEstimate += metrics.bytes - shardSize->get().get().bytes;
}
}
shardSize->set( metrics ); shardSize->set( metrics );
} }
@ -254,10 +258,15 @@ ACTOR Future<int64_t> getFirstSize( Reference<AsyncVar<Optional<StorageMetrics>>
} }
} }
ACTOR Future<Void> changeSizes( DataDistributionTracker* self, KeyRangeRef keys, int64_t oldShardsEndingSize ) { ACTOR Future<Void> changeSizes( DataDistributionTracker* self, KeyRange keys, int64_t oldShardsEndingSize ) {
state vector<Future<int64_t>> sizes; state vector<Future<int64_t>> sizes;
state vector<Future<int64_t>> systemSizes;
for (auto it : self->shards.intersectingRanges(keys) ) { for (auto it : self->shards.intersectingRanges(keys) ) {
sizes.push_back( getFirstSize( it->value().stats ) ); Future<int64_t> thisSize = getFirstSize( it->value().stats );
sizes.push_back( thisSize );
if(it->range().begin >= systemKeys.begin) {
systemSizes.push_back( thisSize );
}
} }
wait( waitForAll( sizes ) ); wait( waitForAll( sizes ) );
@ -267,12 +276,20 @@ ACTOR Future<Void> changeSizes( DataDistributionTracker* self, KeyRangeRef keys,
for ( int i = 0; i < sizes.size(); i++ ) for ( int i = 0; i < sizes.size(); i++ )
newShardsStartingSize += sizes[i].get(); newShardsStartingSize += sizes[i].get();
int64_t newSystemShardsStartingSize = 0;
for ( int i = 0; i < systemSizes.size(); i++ )
newSystemShardsStartingSize += systemSizes[i].get();
int64_t totalSizeEstimate = self->dbSizeEstimate->get(); int64_t totalSizeEstimate = self->dbSizeEstimate->get();
/*TraceEvent("TrackerChangeSizes") /*TraceEvent("TrackerChangeSizes")
.detail("TotalSizeEstimate", totalSizeEstimate) .detail("TotalSizeEstimate", totalSizeEstimate)
.detail("EndSizeOfOldShards", oldShardsEndingSize) .detail("EndSizeOfOldShards", oldShardsEndingSize)
.detail("StartingSizeOfNewShards", newShardsStartingSize);*/ .detail("StartingSizeOfNewShards", newShardsStartingSize);*/
self->dbSizeEstimate->set( totalSizeEstimate + newShardsStartingSize - oldShardsEndingSize ); self->dbSizeEstimate->set( totalSizeEstimate + newShardsStartingSize - oldShardsEndingSize );
self->systemSizeEstimate += newSystemShardsStartingSize;
if(keys.begin >= systemKeys.begin) {
self->systemSizeEstimate -= oldShardsEndingSize;
}
return Void(); return Void();
} }
@ -352,12 +369,12 @@ ACTOR Future<Void> shardSplitter(
for( int i = 0; i < skipRange; i++ ) { for( int i = 0; i < skipRange; i++ ) {
KeyRangeRef r(splitKeys[i], splitKeys[i+1]); KeyRangeRef r(splitKeys[i], splitKeys[i+1]);
self->shardsAffectedByTeamFailure->defineShard( r ); self->shardsAffectedByTeamFailure->defineShard( r );
self->output.send( RelocateShard( r, PRIORITY_SPLIT_SHARD) ); self->output.send( RelocateShard( r, SERVER_KNOBS->PRIORITY_SPLIT_SHARD) );
} }
for( int i = numShards-1; i > skipRange; i-- ) { for( int i = numShards-1; i > skipRange; i-- ) {
KeyRangeRef r(splitKeys[i], splitKeys[i+1]); KeyRangeRef r(splitKeys[i], splitKeys[i+1]);
self->shardsAffectedByTeamFailure->defineShard( r ); self->shardsAffectedByTeamFailure->defineShard( r );
self->output.send( RelocateShard( r, PRIORITY_SPLIT_SHARD) ); self->output.send( RelocateShard( r, SERVER_KNOBS->PRIORITY_SPLIT_SHARD) );
} }
self->sizeChanges.add( changeSizes( self, keys, shardSize->get().get().bytes ) ); self->sizeChanges.add( changeSizes( self, keys, shardSize->get().get().bytes ) );
@ -458,7 +475,7 @@ Future<Void> shardMerger(
restartShardTrackers( self, mergeRange, endingStats ); restartShardTrackers( self, mergeRange, endingStats );
self->shardsAffectedByTeamFailure->defineShard( mergeRange ); self->shardsAffectedByTeamFailure->defineShard( mergeRange );
self->output.send( RelocateShard( mergeRange, PRIORITY_MERGE_SHARD ) ); self->output.send( RelocateShard( mergeRange, SERVER_KNOBS->PRIORITY_MERGE_SHARD ) );
// We are about to be cancelled by the call to restartShardTrackers // We are about to be cancelled by the call to restartShardTrackers
return Void(); return Void();
@ -641,7 +658,7 @@ ACTOR Future<Void> fetchShardMetrics_impl( DataDistributionTracker* self, GetMet
ACTOR Future<Void> fetchShardMetrics( DataDistributionTracker* self, GetMetricsRequest req ) { ACTOR Future<Void> fetchShardMetrics( DataDistributionTracker* self, GetMetricsRequest req ) {
choose { choose {
when( wait( fetchShardMetrics_impl( self, req ) ) ) {} when( wait( fetchShardMetrics_impl( self, req ) ) ) {}
when( wait( delay( SERVER_KNOBS->DD_SHARD_METRICS_TIMEOUT ) ) ) { when( wait( delay( SERVER_KNOBS->DD_SHARD_METRICS_TIMEOUT, TaskPriority::DataDistribution ) ) ) {
TEST(true); // DD_SHARD_METRICS_TIMEOUT TEST(true); // DD_SHARD_METRICS_TIMEOUT
StorageMetrics largeMetrics; StorageMetrics largeMetrics;
largeMetrics.bytes = SERVER_KNOBS->MAX_SHARD_BYTES; largeMetrics.bytes = SERVER_KNOBS->MAX_SHARD_BYTES;
@ -676,6 +693,7 @@ ACTOR Future<Void> dataDistributionTracker(
TraceEvent("DDTrackerStats", self.distributorId) TraceEvent("DDTrackerStats", self.distributorId)
.detail("Shards", self.shards.size()) .detail("Shards", self.shards.size())
.detail("TotalSizeBytes", self.dbSizeEstimate->get()) .detail("TotalSizeBytes", self.dbSizeEstimate->get())
.detail("SystemSizeBytes", self.systemSizeEstimate)
.trackLatest( "DDTrackerStats" ); .trackLatest( "DDTrackerStats" );
loggingTrigger = delay(SERVER_KNOBS->DATA_DISTRIBUTION_LOGGING_INTERVAL); loggingTrigger = delay(SERVER_KNOBS->DATA_DISTRIBUTION_LOGGING_INTERVAL);

View File

@ -106,6 +106,19 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( INFLIGHT_PENALTY_ONE_LEFT, 1000.0 ); init( INFLIGHT_PENALTY_ONE_LEFT, 1000.0 );
init( MERGE_ONTO_NEW_TEAM, 1 ); if( randomize && BUGGIFY ) MERGE_ONTO_NEW_TEAM = deterministicRandom()->coinflip() ? 0 : 2; init( MERGE_ONTO_NEW_TEAM, 1 ); if( randomize && BUGGIFY ) MERGE_ONTO_NEW_TEAM = deterministicRandom()->coinflip() ? 0 : 2;
init( PRIORITY_RECOVER_MOVE, 110 );
init( PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, 120 );
init( PRIORITY_REBALANCE_OVERUTILIZED_TEAM, 121 );
init( PRIORITY_TEAM_HEALTHY, 140 );
init( PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER, 150 );
init( PRIORITY_TEAM_REDUNDANT, 200 );
init( PRIORITY_MERGE_SHARD, 340 );
init( PRIORITY_TEAM_UNHEALTHY, 700 );
init( PRIORITY_TEAM_2_LEFT, 709 );
init( PRIORITY_TEAM_1_LEFT, 800 );
init( PRIORITY_TEAM_0_LEFT, 809 );
init( PRIORITY_SPLIT_SHARD, 900 ); if( randomize && BUGGIFY ) PRIORITY_SPLIT_SHARD = 350;
// Data distribution // Data distribution
init( RETRY_RELOCATESHARD_DELAY, 0.1 ); init( RETRY_RELOCATESHARD_DELAY, 0.1 );
init( DATA_DISTRIBUTION_FAILURE_REACTION_TIME, 60.0 ); if( randomize && BUGGIFY ) DATA_DISTRIBUTION_FAILURE_REACTION_TIME = 1.0; init( DATA_DISTRIBUTION_FAILURE_REACTION_TIME, 60.0 ); if( randomize && BUGGIFY ) DATA_DISTRIBUTION_FAILURE_REACTION_TIME = 1.0;
@ -304,6 +317,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( ENFORCED_MIN_RECOVERY_DURATION, 0.085 ); if( shortRecoveryDuration ) ENFORCED_MIN_RECOVERY_DURATION = 0.01; init( ENFORCED_MIN_RECOVERY_DURATION, 0.085 ); if( shortRecoveryDuration ) ENFORCED_MIN_RECOVERY_DURATION = 0.01;
init( REQUIRED_MIN_RECOVERY_DURATION, 0.080 ); if( shortRecoveryDuration ) REQUIRED_MIN_RECOVERY_DURATION = 0.01; init( REQUIRED_MIN_RECOVERY_DURATION, 0.080 ); if( shortRecoveryDuration ) REQUIRED_MIN_RECOVERY_DURATION = 0.01;
init( ALWAYS_CAUSAL_READ_RISKY, false ); init( ALWAYS_CAUSAL_READ_RISKY, false );
init( MAX_COMMIT_UPDATES, 100000 ); if( randomize && BUGGIFY ) MAX_COMMIT_UPDATES = 1;
// Master Server // Master Server
// masterCommitter() in the master server will allow lower priority tasks (e.g. DataDistibution) // masterCommitter() in the master server will allow lower priority tasks (e.g. DataDistibution)
@ -485,6 +499,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( STATUS_MIN_TIME_BETWEEN_REQUESTS, 0.0 ); init( STATUS_MIN_TIME_BETWEEN_REQUESTS, 0.0 );
init( MAX_STATUS_REQUESTS_PER_SECOND, 256.0 ); init( MAX_STATUS_REQUESTS_PER_SECOND, 256.0 );
init( CONFIGURATION_ROWS_TO_FETCH, 20000 ); init( CONFIGURATION_ROWS_TO_FETCH, 20000 );
init( DISABLE_DUPLICATE_LOG_WARNING, false );
// IPager // IPager
init( PAGER_RESERVED_PAGES, 1 ); init( PAGER_RESERVED_PAGES, 1 );

View File

@ -106,6 +106,24 @@ public:
double INFLIGHT_PENALTY_ONE_LEFT; double INFLIGHT_PENALTY_ONE_LEFT;
int MERGE_ONTO_NEW_TEAM; // Merges will request new servers. 0 for off, 1 for \xff only, 2 for all shards. int MERGE_ONTO_NEW_TEAM; // Merges will request new servers. 0 for off, 1 for \xff only, 2 for all shards.
// Higher priorities are executed first
// Priority/100 is the "priority group"/"superpriority". Priority inversion
// is possible within but not between priority groups; fewer priority groups
// mean better worst case time bounds
// Maximum allowable priority is 999.
int PRIORITY_RECOVER_MOVE;
int PRIORITY_REBALANCE_UNDERUTILIZED_TEAM;
int PRIORITY_REBALANCE_OVERUTILIZED_TEAM;
int PRIORITY_TEAM_HEALTHY;
int PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER;
int PRIORITY_TEAM_REDUNDANT;
int PRIORITY_MERGE_SHARD;
int PRIORITY_TEAM_UNHEALTHY;
int PRIORITY_TEAM_2_LEFT;
int PRIORITY_TEAM_1_LEFT;
int PRIORITY_TEAM_0_LEFT;
int PRIORITY_SPLIT_SHARD;
// Data distribution // Data distribution
double RETRY_RELOCATESHARD_DELAY; double RETRY_RELOCATESHARD_DELAY;
double DATA_DISTRIBUTION_FAILURE_REACTION_TIME; double DATA_DISTRIBUTION_FAILURE_REACTION_TIME;
@ -244,6 +262,7 @@ public:
double ENFORCED_MIN_RECOVERY_DURATION; double ENFORCED_MIN_RECOVERY_DURATION;
double REQUIRED_MIN_RECOVERY_DURATION; double REQUIRED_MIN_RECOVERY_DURATION;
bool ALWAYS_CAUSAL_READ_RISKY; bool ALWAYS_CAUSAL_READ_RISKY;
int MAX_COMMIT_UPDATES;
// Master Server // Master Server
double COMMIT_SLEEP_TIME; double COMMIT_SLEEP_TIME;
@ -423,6 +442,7 @@ public:
double STATUS_MIN_TIME_BETWEEN_REQUESTS; double STATUS_MIN_TIME_BETWEEN_REQUESTS;
double MAX_STATUS_REQUESTS_PER_SECOND; double MAX_STATUS_REQUESTS_PER_SECOND;
int CONFIGURATION_ROWS_TO_FETCH; int CONFIGURATION_ROWS_TO_FETCH;
bool DISABLE_DUPLICATE_LOG_WARNING;
// IPager // IPager
int PAGER_RESERVED_PAGES; int PAGER_RESERVED_PAGES;

View File

@ -236,6 +236,7 @@ struct ProxyCommitData {
Optional<LatencyBandConfig> latencyBandConfig; Optional<LatencyBandConfig> latencyBandConfig;
double lastStartCommit; double lastStartCommit;
double lastCommitLatency; double lastCommitLatency;
int updateCommitRequests = 0;
NotifiedDouble lastCommitTime; NotifiedDouble lastCommitTime;
//The tag related to a storage server rarely change, so we keep a vector of tags for each key range to be slightly more CPU efficient. //The tag related to a storage server rarely change, so we keep a vector of tags for each key range to be slightly more CPU efficient.
@ -810,27 +811,32 @@ ACTOR Future<Void> commitBatch(
// Serialize and backup the mutations as a single mutation // Serialize and backup the mutations as a single mutation
if ((self->vecBackupKeys.size() > 1) && logRangeMutations.size()) { if ((self->vecBackupKeys.size() > 1) && logRangeMutations.size()) {
state std::map<Key, MutationListRef>::iterator logRangeMutation = logRangeMutations.begin();
Key val;
MutationRef backupMutation;
uint32_t* partBuffer = NULL;
// Serialize the log range mutations within the map // Serialize the log range mutations within the map
for (auto& logRangeMutation : logRangeMutations) for (; logRangeMutation != logRangeMutations.end(); ++logRangeMutation)
{ {
if(yieldBytes > SERVER_KNOBS->DESIRED_TOTAL_BYTES) {
yieldBytes = 0;
wait(yield());
}
yieldBytes += logRangeMutation->second.expectedSize();
BinaryWriter wr(Unversioned()); BinaryWriter wr(Unversioned());
// Serialize the log destination // Serialize the log destination
wr.serializeBytes( logRangeMutation.first ); wr.serializeBytes( logRangeMutation->first );
// Write the log keys and version information // Write the log keys and version information
wr << (uint8_t)hashlittle(&v, sizeof(v), 0); wr << (uint8_t)hashlittle(&v, sizeof(v), 0);
wr << bigEndian64(commitVersion); wr << bigEndian64(commitVersion);
MutationRef backupMutation;
backupMutation.type = MutationRef::SetValue; backupMutation.type = MutationRef::SetValue;
partBuffer = NULL; uint32_t* partBuffer = NULL;
val = BinaryWriter::toValue(logRangeMutation.second, IncludeVersion()); Key val = BinaryWriter::toValue(logRangeMutation->second, IncludeVersion());
for (int part = 0; part * CLIENT_KNOBS->MUTATION_BLOCK_SIZE < val.size(); part++) { for (int part = 0; part * CLIENT_KNOBS->MUTATION_BLOCK_SIZE < val.size(); part++) {
@ -852,7 +858,7 @@ ACTOR Future<Void> commitBatch(
// Define the mutation type and and location // Define the mutation type and and location
backupMutation.param1 = wr.toValue(); backupMutation.param1 = wr.toValue();
ASSERT( backupMutation.param1.startsWith(logRangeMutation.first) ); // We are writing into the configured destination ASSERT( backupMutation.param1.startsWith(logRangeMutation->first) ); // We are writing into the configured destination
auto& tags = self->tagsForKey(backupMutation.param1); auto& tags = self->tagsForKey(backupMutation.param1);
toCommit.addTags(tags); toCommit.addTags(tags);
@ -1040,7 +1046,9 @@ ACTOR Future<Void> commitBatch(
ACTOR Future<Void> updateLastCommit(ProxyCommitData* self, Optional<UID> debugID = Optional<UID>()) { ACTOR Future<Void> updateLastCommit(ProxyCommitData* self, Optional<UID> debugID = Optional<UID>()) {
state double confirmStart = now(); state double confirmStart = now();
self->lastStartCommit = confirmStart; self->lastStartCommit = confirmStart;
self->updateCommitRequests++;
wait(self->logSystem->confirmEpochLive(debugID)); wait(self->logSystem->confirmEpochLive(debugID));
self->updateCommitRequests--;
self->lastCommitLatency = now()-confirmStart; self->lastCommitLatency = now()-confirmStart;
self->lastCommitTime = std::max(self->lastCommitTime.get(), confirmStart); self->lastCommitTime = std::max(self->lastCommitTime.get(), confirmStart);
return Void(); return Void();
@ -1448,7 +1456,12 @@ ACTOR Future<Void> lastCommitUpdater(ProxyCommitData* self, PromiseStream<Future
if(elapsed < interval) { if(elapsed < interval) {
wait( delay(interval + 0.0001 - elapsed) ); wait( delay(interval + 0.0001 - elapsed) );
} else { } else {
addActor.send(updateLastCommit(self)); if(self->updateCommitRequests < SERVER_KNOBS->MAX_COMMIT_UPDATES) {
addActor.send(updateLastCommit(self));
} else {
TraceEvent(g_network->isSimulated() ? SevInfo : SevWarnAlways, "TooManyLastCommitUpdates").suppressFor(1.0);
self->lastStartCommit = now();
}
} }
} }
} }

View File

@ -697,7 +697,7 @@ ACTOR Future<Void> configurationMonitor(Reference<AsyncVar<ServerDBInfo>> dbInfo
conf->fromKeyValues( (VectorRef<KeyValueRef>) results ); conf->fromKeyValues( (VectorRef<KeyValueRef>) results );
state Future<Void> watchFuture = tr.watch(moveKeysLockOwnerKey); state Future<Void> watchFuture = tr.watch(moveKeysLockOwnerKey) || tr.watch(excludedServersVersionKey);
wait( tr.commit() ); wait( tr.commit() );
wait( watchFuture ); wait( watchFuture );
break; break;

View File

@ -945,11 +945,8 @@ void SimulationConfig::generateNormalConfig(int minimumReplication, int minimumR
} }
} }
if (deterministicRandom()->random01() < 0.25) { if (deterministicRandom()->random01() < 0.25) primaryObj["satellite_logs"] = deterministicRandom()->randomInt(1,7);
int logs = deterministicRandom()->randomInt(1,7); if (deterministicRandom()->random01() < 0.25) remoteObj["satellite_logs"] = deterministicRandom()->randomInt(1,7);
primaryObj["satellite_logs"] = logs;
remoteObj["satellite_logs"] = logs;
}
//We cannot run with a remote DC when MAX_READ_TRANSACTION_LIFE_VERSIONS is too small, because the log routers will not be able to keep up. //We cannot run with a remote DC when MAX_READ_TRANSACTION_LIFE_VERSIONS is too small, because the log routers will not be able to keep up.
if (minimumRegions <= 1 && (deterministicRandom()->random01() < 0.25 || SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS < SERVER_KNOBS->VERSIONS_PER_SECOND)) { if (minimumRegions <= 1 && (deterministicRandom()->random01() < 0.25 || SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS < SERVER_KNOBS->VERSIONS_PER_SECOND)) {
@ -998,12 +995,14 @@ void SimulationConfig::generateNormalConfig(int minimumReplication, int minimumR
primarySatelliteObj["id"] = useNormalDCsAsSatellites ? "1" : "2"; primarySatelliteObj["id"] = useNormalDCsAsSatellites ? "1" : "2";
primarySatelliteObj["priority"] = 1; primarySatelliteObj["priority"] = 1;
primarySatelliteObj["satellite"] = 1; primarySatelliteObj["satellite"] = 1;
if (deterministicRandom()->random01() < 0.25) primarySatelliteObj["satellite_logs"] = deterministicRandom()->randomInt(1,7);
primaryDcArr.push_back(primarySatelliteObj); primaryDcArr.push_back(primarySatelliteObj);
StatusObject remoteSatelliteObj; StatusObject remoteSatelliteObj;
remoteSatelliteObj["id"] = useNormalDCsAsSatellites ? "0" : "3"; remoteSatelliteObj["id"] = useNormalDCsAsSatellites ? "0" : "3";
remoteSatelliteObj["priority"] = 1; remoteSatelliteObj["priority"] = 1;
remoteSatelliteObj["satellite"] = 1; remoteSatelliteObj["satellite"] = 1;
if (deterministicRandom()->random01() < 0.25) remoteSatelliteObj["satellite_logs"] = deterministicRandom()->randomInt(1,7);
remoteDcArr.push_back(remoteSatelliteObj); remoteDcArr.push_back(remoteSatelliteObj);
if (datacenters > 4) { if (datacenters > 4) {
@ -1011,12 +1010,14 @@ void SimulationConfig::generateNormalConfig(int minimumReplication, int minimumR
primarySatelliteObjB["id"] = useNormalDCsAsSatellites ? "2" : "4"; primarySatelliteObjB["id"] = useNormalDCsAsSatellites ? "2" : "4";
primarySatelliteObjB["priority"] = 1; primarySatelliteObjB["priority"] = 1;
primarySatelliteObjB["satellite"] = 1; primarySatelliteObjB["satellite"] = 1;
if (deterministicRandom()->random01() < 0.25) primarySatelliteObjB["satellite_logs"] = deterministicRandom()->randomInt(1,7);
primaryDcArr.push_back(primarySatelliteObjB); primaryDcArr.push_back(primarySatelliteObjB);
StatusObject remoteSatelliteObjB; StatusObject remoteSatelliteObjB;
remoteSatelliteObjB["id"] = useNormalDCsAsSatellites ? "2" : "5"; remoteSatelliteObjB["id"] = useNormalDCsAsSatellites ? "2" : "5";
remoteSatelliteObjB["priority"] = 1; remoteSatelliteObjB["priority"] = 1;
remoteSatelliteObjB["satellite"] = 1; remoteSatelliteObjB["satellite"] = 1;
if (deterministicRandom()->random01() < 0.25) remoteSatelliteObjB["satellite_logs"] = deterministicRandom()->randomInt(1,7);
remoteDcArr.push_back(remoteSatelliteObjB); remoteDcArr.push_back(remoteSatelliteObjB);
} }
if (useNormalDCsAsSatellites) { if (useNormalDCsAsSatellites) {

View File

@ -1122,33 +1122,102 @@ ACTOR static Future<JsonBuilderObject> latencyProbeFetcher(Database cx, JsonBuil
return statusObj; return statusObj;
} }
ACTOR static Future<Void> consistencyCheckStatusFetcher(Database cx, JsonBuilderArray *messages, std::set<std::string> *incomplete_reasons, bool isAvailable) { ACTOR static Future<Void> consistencyCheckStatusFetcher(Database cx, JsonBuilderArray *messages, std::set<std::string> *incomplete_reasons) {
if(isAvailable) { try {
try { state Transaction tr(cx);
state Transaction tr(cx); loop {
loop { try {
try { tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); tr.setOption(FDBTransactionOptions::LOCK_AWARE);
tr.setOption(FDBTransactionOptions::LOCK_AWARE); tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); Optional<Value> ccSuspendVal = wait(timeoutError(BUGGIFY ? Never() : tr.get(fdbShouldConsistencyCheckBeSuspended), 5.0));
Optional<Value> ccSuspendVal = wait(timeoutError(BUGGIFY ? Never() : tr.get(fdbShouldConsistencyCheckBeSuspended), 5.0)); bool ccSuspend = ccSuspendVal.present() ? BinaryReader::fromStringRef<bool>(ccSuspendVal.get(), Unversioned()) : false;
bool ccSuspend = ccSuspendVal.present() ? BinaryReader::fromStringRef<bool>(ccSuspendVal.get(), Unversioned()) : false; if(ccSuspend) {
if(ccSuspend) { messages->push_back(JsonString::makeMessage("consistencycheck_disabled", "Consistency checker is disabled."));
messages->push_back(JsonString::makeMessage("consistencycheck_disabled", "Consistency checker is disabled."));
}
break;
} catch(Error &e) {
if(e.code() == error_code_timed_out) {
messages->push_back(JsonString::makeMessage("consistencycheck_suspendkey_fetch_timeout",
format("Timed out trying to fetch `%s` from the database.", printable(fdbShouldConsistencyCheckBeSuspended).c_str()).c_str()));
break;
}
wait(tr.onError(e));
} }
break;
} catch(Error &e) {
if(e.code() == error_code_timed_out) {
messages->push_back(JsonString::makeMessage("consistencycheck_suspendkey_fetch_timeout",
format("Timed out trying to fetch `%s` from the database.", printable(fdbShouldConsistencyCheckBeSuspended).c_str()).c_str()));
break;
}
wait(tr.onError(e));
} }
} catch(Error &e) {
incomplete_reasons->insert(format("Unable to retrieve consistency check settings (%s).", e.what()));
} }
} catch(Error &e) {
incomplete_reasons->insert(format("Unable to retrieve consistency check settings (%s).", e.what()));
}
return Void();
}
struct LogRangeAndUID {
KeyRange range;
UID destID;
LogRangeAndUID(KeyRange const& range, UID const& destID) : range(range), destID(destID) {}
bool operator < (LogRangeAndUID const& r) const {
if(range.begin != r.range.begin) return range.begin < r.range.begin;
if(range.end != r.range.end) return range.end < r.range.end;
return destID < r.destID;
}
};
ACTOR static Future<Void> logRangeWarningFetcher(Database cx, JsonBuilderArray *messages, std::set<std::string> *incomplete_reasons) {
try {
state Transaction tr(cx);
state Future<Void> timeoutFuture = timeoutError(Future<Void>(Never()), 5.0);
loop {
try {
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
state Future<Standalone<RangeResultRef>> existingDestUidValues = tr.getRange(KeyRangeRef(destUidLookupPrefix, strinc(destUidLookupPrefix)), CLIENT_KNOBS->TOO_MANY);
state Future<Standalone<RangeResultRef>> existingLogRanges = tr.getRange(logRangesRange, CLIENT_KNOBS->TOO_MANY);
wait( (success(existingDestUidValues) && success(existingLogRanges)) || timeoutFuture );
std::set<LogRangeAndUID> loggingRanges;
for(auto& it : existingLogRanges.get()) {
Key logDestination;
UID logUid;
KeyRef logRangeBegin = logRangesDecodeKey(it.key, &logUid);
Key logRangeEnd = logRangesDecodeValue(it.value, &logDestination);
loggingRanges.insert(LogRangeAndUID(KeyRangeRef(logRangeBegin, logRangeEnd), logUid));
}
std::set<std::pair<Key,Key>> existingRanges;
for(auto& it : existingDestUidValues.get()) {
KeyRange range = BinaryReader::fromStringRef<KeyRange>(it.key.removePrefix(destUidLookupPrefix), IncludeVersion());
UID logUid = BinaryReader::fromStringRef<UID>(it.value, Unversioned());
if(loggingRanges.count(LogRangeAndUID(range, logUid))) {
std::pair<Key,Key> rangePair = std::make_pair(range.begin,range.end);
if(existingRanges.count(rangePair)) {
messages->push_back(JsonString::makeMessage("duplicate_mutation_streams", format("Backup and DR are not sharing the same stream of mutations for `%s` - `%s`", printable(range.begin).c_str(), printable(range.end).c_str()).c_str()));
break;
}
existingRanges.insert(rangePair);
} else {
//This cleanup is done during status, because it should only be required once after upgrading to 6.2.7 or later.
//There is no other good location to detect that the metadata is mismatched.
TraceEvent(SevWarnAlways, "CleaningDestUidLookup").detail("K", it.key.printable()).detail("V", it.value.printable());
tr.clear(it.key);
}
}
wait(tr.commit() || timeoutFuture);
break;
} catch(Error &e) {
if(e.code() == error_code_timed_out) {
messages->push_back(JsonString::makeMessage("duplicate_mutation_fetch_timeout",
format("Timed out trying to fetch `%s` from the database.", printable(destUidLookupPrefix).c_str()).c_str()));
break;
}
wait(tr.onError(e));
}
}
} catch(Error &e) {
incomplete_reasons->insert(format("Unable to retrieve log ranges (%s).", e.what()));
} }
return Void(); return Void();
} }
@ -1274,7 +1343,7 @@ static JsonBuilderObject configurationFetcher(Optional<DatabaseConfiguration> co
return statusObj; return statusObj;
} }
ACTOR static Future<JsonBuilderObject> dataStatusFetcher(WorkerDetails ddWorker, int *minReplicasRemaining) { ACTOR static Future<JsonBuilderObject> dataStatusFetcher(WorkerDetails ddWorker, DatabaseConfiguration configuration, int *minReplicasRemaining) {
state JsonBuilderObject statusObjData; state JsonBuilderObject statusObjData;
try { try {
@ -1328,6 +1397,7 @@ ACTOR static Future<JsonBuilderObject> dataStatusFetcher(WorkerDetails ddWorker,
if (dataStats.size()) if (dataStats.size())
{ {
statusObjData.setKeyRawNumber("total_kv_size_bytes",dataStats.getValue("TotalSizeBytes")); statusObjData.setKeyRawNumber("total_kv_size_bytes",dataStats.getValue("TotalSizeBytes"));
statusObjData.setKeyRawNumber("system_kv_size_bytes",dataStats.getValue("SystemSizeBytes"));
statusObjData.setKeyRawNumber("partitions_count",dataStats.getValue("Shards")); statusObjData.setKeyRawNumber("partitions_count",dataStats.getValue("Shards"));
} }
@ -1338,13 +1408,14 @@ ACTOR static Future<JsonBuilderObject> dataStatusFetcher(WorkerDetails ddWorker,
continue; continue;
} }
int replicas = configuration.storageTeamSize;
bool primary = inFlight.getInt("Primary"); bool primary = inFlight.getInt("Primary");
int highestPriority = inFlight.getInt("HighestPriority"); int highestPriority = inFlight.getInt("HighestPriority");
if (movingHighestPriority < PRIORITY_TEAM_REDUNDANT) { if (movingHighestPriority < SERVER_KNOBS->PRIORITY_TEAM_REDUNDANT) {
highestPriority = movingHighestPriority; highestPriority = movingHighestPriority;
} else if (partitionsInFlight > 0) { } else if (partitionsInFlight > 0) {
highestPriority = std::max<int>(highestPriority, PRIORITY_MERGE_SHARD); highestPriority = std::max<int>(highestPriority, SERVER_KNOBS->PRIORITY_MERGE_SHARD);
} }
JsonBuilderObject team_tracker; JsonBuilderObject team_tracker;
@ -1353,53 +1424,47 @@ ACTOR static Future<JsonBuilderObject> dataStatusFetcher(WorkerDetails ddWorker,
team_tracker.setKeyRawNumber("unhealthy_servers",inFlight.getValue("UnhealthyServers")); team_tracker.setKeyRawNumber("unhealthy_servers",inFlight.getValue("UnhealthyServers"));
JsonBuilderObject stateSectionObj; JsonBuilderObject stateSectionObj;
if (highestPriority >= PRIORITY_TEAM_0_LEFT) { if (highestPriority >= SERVER_KNOBS->PRIORITY_TEAM_0_LEFT) {
stateSectionObj["healthy"] = false; stateSectionObj["healthy"] = false;
stateSectionObj["name"] = "missing_data"; stateSectionObj["name"] = "missing_data";
stateSectionObj["description"] = "No replicas remain of some data"; stateSectionObj["description"] = "No replicas remain of some data";
stateSectionObj["min_replicas_remaining"] = 0; stateSectionObj["min_replicas_remaining"] = 0;
if(primary) { replicas = 0;
*minReplicasRemaining = 0;
}
} }
else if (highestPriority >= PRIORITY_TEAM_1_LEFT) { else if (highestPriority >= SERVER_KNOBS->PRIORITY_TEAM_1_LEFT) {
stateSectionObj["healthy"] = false; stateSectionObj["healthy"] = false;
stateSectionObj["name"] = "healing"; stateSectionObj["name"] = "healing";
stateSectionObj["description"] = "Only one replica remains of some data"; stateSectionObj["description"] = "Only one replica remains of some data";
stateSectionObj["min_replicas_remaining"] = 1; stateSectionObj["min_replicas_remaining"] = 1;
if(primary) { replicas = 1;
*minReplicasRemaining = 1;
}
} }
else if (highestPriority >= PRIORITY_TEAM_2_LEFT) { else if (highestPriority >= SERVER_KNOBS->PRIORITY_TEAM_2_LEFT) {
stateSectionObj["healthy"] = false; stateSectionObj["healthy"] = false;
stateSectionObj["name"] = "healing"; stateSectionObj["name"] = "healing";
stateSectionObj["description"] = "Only two replicas remain of some data"; stateSectionObj["description"] = "Only two replicas remain of some data";
stateSectionObj["min_replicas_remaining"] = 2; stateSectionObj["min_replicas_remaining"] = 2;
if(primary) { replicas = 2;
*minReplicasRemaining = 2;
}
} }
else if (highestPriority >= PRIORITY_TEAM_UNHEALTHY) { else if (highestPriority >= SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY) {
stateSectionObj["healthy"] = false; stateSectionObj["healthy"] = false;
stateSectionObj["name"] = "healing"; stateSectionObj["name"] = "healing";
stateSectionObj["description"] = "Restoring replication factor"; stateSectionObj["description"] = "Restoring replication factor";
} else if (highestPriority >= PRIORITY_MERGE_SHARD) { } else if (highestPriority >= SERVER_KNOBS->PRIORITY_MERGE_SHARD) {
stateSectionObj["healthy"] = true; stateSectionObj["healthy"] = true;
stateSectionObj["name"] = "healthy_repartitioning"; stateSectionObj["name"] = "healthy_repartitioning";
stateSectionObj["description"] = "Repartitioning."; stateSectionObj["description"] = "Repartitioning.";
} else if (highestPriority >= PRIORITY_TEAM_REDUNDANT) { } else if (highestPriority >= SERVER_KNOBS->PRIORITY_TEAM_REDUNDANT) {
stateSectionObj["healthy"] = true; stateSectionObj["healthy"] = true;
stateSectionObj["name"] = "optimizing_team_collections"; stateSectionObj["name"] = "optimizing_team_collections";
stateSectionObj["description"] = "Optimizing team collections"; stateSectionObj["description"] = "Optimizing team collections";
} else if (highestPriority >= PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER) { } else if (highestPriority >= SERVER_KNOBS->PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER) {
stateSectionObj["healthy"] = true; stateSectionObj["healthy"] = true;
stateSectionObj["name"] = "healthy_removing_server"; stateSectionObj["name"] = "healthy_removing_server";
stateSectionObj["description"] = "Removing storage server"; stateSectionObj["description"] = "Removing storage server";
} else if (highestPriority == PRIORITY_TEAM_HEALTHY) { } else if (highestPriority == SERVER_KNOBS->PRIORITY_TEAM_HEALTHY) {
stateSectionObj["healthy"] = true; stateSectionObj["healthy"] = true;
stateSectionObj["name"] = "healthy"; stateSectionObj["name"] = "healthy";
} else if (highestPriority >= PRIORITY_REBALANCE_SHARD) { } else if (highestPriority >= SERVER_KNOBS->PRIORITY_RECOVER_MOVE) {
stateSectionObj["healthy"] = true; stateSectionObj["healthy"] = true;
stateSectionObj["name"] = "healthy_rebalancing"; stateSectionObj["name"] = "healthy_rebalancing";
stateSectionObj["description"] = "Rebalancing"; stateSectionObj["description"] = "Rebalancing";
@ -1415,6 +1480,13 @@ ACTOR static Future<JsonBuilderObject> dataStatusFetcher(WorkerDetails ddWorker,
statusObjData["state"] = stateSectionObj; statusObjData["state"] = stateSectionObj;
} }
} }
if(primary) {
*minReplicasRemaining = std::max(*minReplicasRemaining, 0) + replicas;
}
else if(replicas > 0) {
*minReplicasRemaining = std::max(*minReplicasRemaining, 0) + 1;
}
} }
statusObjData["team_trackers"] = teamTrackers; statusObjData["team_trackers"] = teamTrackers;
} }
@ -2219,7 +2291,13 @@ ACTOR Future<StatusReply> clusterGetStatus(
statusObj["latency_probe"] = latencyProbeResults; statusObj["latency_probe"] = latencyProbeResults;
} }
wait(consistencyCheckStatusFetcher(cx, &messages, &status_incomplete_reasons, isAvailable)); state std::vector<Future<Void>> warningFutures;
if(isAvailable) {
warningFutures.push_back( consistencyCheckStatusFetcher(cx, &messages, &status_incomplete_reasons) );
if(!SERVER_KNOBS->DISABLE_DUPLICATE_LOG_WARNING) {
warningFutures.push_back( logRangeWarningFetcher(cx, &messages, &status_incomplete_reasons) );
}
}
// Start getting storage servers now (using system priority) concurrently. Using sys priority because having storage servers // Start getting storage servers now (using system priority) concurrently. Using sys priority because having storage servers
// in status output is important to give context to error messages in status that reference a storage server role ID. // in status output is important to give context to error messages in status that reference a storage server role ID.
@ -2234,7 +2312,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
state int minReplicasRemaining = -1; state int minReplicasRemaining = -1;
std::vector<Future<JsonBuilderObject>> futures2; std::vector<Future<JsonBuilderObject>> futures2;
futures2.push_back(dataStatusFetcher(ddWorker, &minReplicasRemaining)); futures2.push_back(dataStatusFetcher(ddWorker, configuration.get(), &minReplicasRemaining));
futures2.push_back(workloadStatusFetcher(db, workers, mWorker, rkWorker, &qos, &data_overlay, &status_incomplete_reasons, storageServerFuture)); futures2.push_back(workloadStatusFetcher(db, workers, mWorker, rkWorker, &qos, &data_overlay, &status_incomplete_reasons, storageServerFuture));
futures2.push_back(layerStatusFetcher(cx, &messages, &status_incomplete_reasons)); futures2.push_back(layerStatusFetcher(cx, &messages, &status_incomplete_reasons));
futures2.push_back(lockedStatusFetcher(db, &messages, &status_incomplete_reasons)); futures2.push_back(lockedStatusFetcher(db, &messages, &status_incomplete_reasons));
@ -2313,6 +2391,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
else { else {
messages.push_back(JsonBuilder::makeMessage("proxies_error", "Timed out trying to retrieve proxies.")); messages.push_back(JsonBuilder::makeMessage("proxies_error", "Timed out trying to retrieve proxies."));
} }
wait( waitForAll(warningFutures) );
} }
else { else {
// Set layers status to { _valid: false, error: "configurationMissing"} // Set layers status to { _valid: false, error: "configurationMissing"}

View File

@ -1213,7 +1213,7 @@ ACTOR Future<Void> configurationMonitor( Reference<MasterData> self ) {
self->registrationTrigger.trigger(); self->registrationTrigger.trigger();
} }
state Future<Void> watchFuture = tr.watch(moveKeysLockOwnerKey); state Future<Void> watchFuture = tr.watch(moveKeysLockOwnerKey) || tr.watch(excludedServersVersionKey);
wait(tr.commit()); wait(tr.commit());
wait(watchFuture); wait(watchFuture);
break; break;

View File

@ -106,6 +106,7 @@ struct AddingShard : NonCopyable {
Version transferredVersion; Version transferredVersion;
enum Phase { WaitPrevious, Fetching, Waiting }; enum Phase { WaitPrevious, Fetching, Waiting };
Phase phase; Phase phase;
AddingShard( StorageServer* server, KeyRangeRef const& keys ); AddingShard( StorageServer* server, KeyRangeRef const& keys );
@ -1948,8 +1949,9 @@ void splitMutation(StorageServer* data, KeyRangeMap<T>& map, MutationRef const&
ACTOR Future<Void> logFetchKeysWarning(AddingShard* shard) { ACTOR Future<Void> logFetchKeysWarning(AddingShard* shard) {
state double startTime = now(); state double startTime = now();
loop { loop {
wait(delay(600)); state double waitSeconds = BUGGIFY ? 5.0 : 600.0;
TraceEvent(SevWarnAlways, "FetchKeysTooLong").detail("Duration", now() - startTime).detail("Phase", shard->phase).detail("Begin", shard->keys.begin.printable()).detail("End", shard->keys.end.printable()); wait(delay(waitSeconds));
TraceEvent(waitSeconds > 300.0 ? SevWarnAlways : SevInfo, "FetchKeysTooLong").detail("Duration", now() - startTime).detail("Phase", shard->phase).detail("Begin", shard->keys.begin.printable()).detail("End", shard->keys.end.printable());
} }
} }
@ -2068,6 +2070,7 @@ ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
shard->server->addShard( ShardInfo::addingSplitLeft( KeyRangeRef(keys.begin, nfk), shard ) ); shard->server->addShard( ShardInfo::addingSplitLeft( KeyRangeRef(keys.begin, nfk), shard ) );
shard->server->addShard( ShardInfo::newAdding( data, KeyRangeRef(nfk, keys.end) ) ); shard->server->addShard( ShardInfo::newAdding( data, KeyRangeRef(nfk, keys.end) ) );
shard = data->shards.rangeContaining( keys.begin ).value()->adding; shard = data->shards.rangeContaining( keys.begin ).value()->adding;
warningLogger = logFetchKeysWarning(shard);
AddingShard* otherShard = data->shards.rangeContaining( nfk ).value()->adding; AddingShard* otherShard = data->shards.rangeContaining( nfk ).value()->adding;
keys = shard->keys; keys = shard->keys;

View File

@ -75,12 +75,14 @@ std::string generateRegions() {
primarySatelliteObj["id"] = "2"; primarySatelliteObj["id"] = "2";
primarySatelliteObj["priority"] = 1; primarySatelliteObj["priority"] = 1;
primarySatelliteObj["satellite"] = 1; primarySatelliteObj["satellite"] = 1;
if (deterministicRandom()->random01() < 0.25) primarySatelliteObj["satellite_logs"] = deterministicRandom()->randomInt(1,7);
primaryDcArr.push_back(primarySatelliteObj); primaryDcArr.push_back(primarySatelliteObj);
StatusObject remoteSatelliteObj; StatusObject remoteSatelliteObj;
remoteSatelliteObj["id"] = "3"; remoteSatelliteObj["id"] = "3";
remoteSatelliteObj["priority"] = 1; remoteSatelliteObj["priority"] = 1;
remoteSatelliteObj["satellite"] = 1; remoteSatelliteObj["satellite"] = 1;
if (deterministicRandom()->random01() < 0.25) remoteSatelliteObj["satellite_logs"] = deterministicRandom()->randomInt(1,7);
remoteDcArr.push_back(remoteSatelliteObj); remoteDcArr.push_back(remoteSatelliteObj);
if(g_simulator.physicalDatacenters > 5 && deterministicRandom()->random01() < 0.5) { if(g_simulator.physicalDatacenters > 5 && deterministicRandom()->random01() < 0.5) {
@ -88,12 +90,14 @@ std::string generateRegions() {
primarySatelliteObjB["id"] = "4"; primarySatelliteObjB["id"] = "4";
primarySatelliteObjB["priority"] = 1; primarySatelliteObjB["priority"] = 1;
primarySatelliteObjB["satellite"] = 1; primarySatelliteObjB["satellite"] = 1;
if (deterministicRandom()->random01() < 0.25) primarySatelliteObjB["satellite_logs"] = deterministicRandom()->randomInt(1,7);
primaryDcArr.push_back(primarySatelliteObjB); primaryDcArr.push_back(primarySatelliteObjB);
StatusObject remoteSatelliteObjB; StatusObject remoteSatelliteObjB;
remoteSatelliteObjB["id"] = "5"; remoteSatelliteObjB["id"] = "5";
remoteSatelliteObjB["priority"] = 1; remoteSatelliteObjB["priority"] = 1;
remoteSatelliteObjB["satellite"] = 1; remoteSatelliteObjB["satellite"] = 1;
if (deterministicRandom()->random01() < 0.25) remoteSatelliteObjB["satellite_logs"] = deterministicRandom()->randomInt(1,7);
remoteDcArr.push_back(remoteSatelliteObjB); remoteDcArr.push_back(remoteSatelliteObjB);
int satellite_replication_type = deterministicRandom()->randomInt(0,3); int satellite_replication_type = deterministicRandom()->randomInt(0,3);
@ -146,11 +150,8 @@ std::string generateRegions() {
} }
} }
if (deterministicRandom()->random01() < 0.25) { if (deterministicRandom()->random01() < 0.25) primaryObj["satellite_logs"] = deterministicRandom()->randomInt(1,7);
int logs = deterministicRandom()->randomInt(1,7); if (deterministicRandom()->random01() < 0.25) remoteObj["satellite_logs"] = deterministicRandom()->randomInt(1,7);
primaryObj["satellite_logs"] = logs;
remoteObj["satellite_logs"] = logs;
}
int remote_replication_type = deterministicRandom()->randomInt(0, 4); int remote_replication_type = deterministicRandom()->randomInt(0, 4);
switch (remote_replication_type) { switch (remote_replication_type) {

View File

@ -44,7 +44,7 @@ struct DDMetricsWorkload : TestWorkload {
TraceEventFields md = wait( timeoutError(masterWorker.eventLogRequest.getReply( TraceEventFields md = wait( timeoutError(masterWorker.eventLogRequest.getReply(
EventLogRequest( LiteralStringRef( "MovingData" ) ) ), 1.0 ) ); EventLogRequest( LiteralStringRef( "MovingData" ) ) ), 1.0 ) );
int relocations; int relocations;
sscanf(md.getValue("HighPriorityRelocations").c_str(), "%d", &relocations); sscanf(md.getValue("UnhealthyRelocations").c_str(), "%d", &relocations);
return relocations; return relocations;
} }

View File

@ -67,7 +67,7 @@ FlowKnobs::FlowKnobs(bool randomize, bool isSimulated) {
init( MAX_RECONNECTION_TIME, 0.5 ); init( MAX_RECONNECTION_TIME, 0.5 );
init( RECONNECTION_TIME_GROWTH_RATE, 1.2 ); init( RECONNECTION_TIME_GROWTH_RATE, 1.2 );
init( RECONNECTION_RESET_TIME, 5.0 ); init( RECONNECTION_RESET_TIME, 5.0 );
init( CONNECTION_ACCEPT_DELAY, 0.01 ); init( CONNECTION_ACCEPT_DELAY, 0.5 );
init( USE_OBJECT_SERIALIZER, 1 ); init( USE_OBJECT_SERIALIZER, 1 );
init( TOO_MANY_CONNECTIONS_CLOSED_RESET_DELAY, 5.0 ); init( TOO_MANY_CONNECTIONS_CLOSED_RESET_DELAY, 5.0 );
init( TOO_MANY_CONNECTIONS_CLOSED_TIMEOUT, 20.0 ); init( TOO_MANY_CONNECTIONS_CLOSED_TIMEOUT, 20.0 );
@ -212,6 +212,7 @@ bool Knobs::setKnob( std::string const& knob, std::string const& value ) {
} }
*bool_knobs[knob] = v; *bool_knobs[knob] = v;
} }
return true;
} }
if (int64_knobs.count(knob) || int_knobs.count(knob)) { if (int64_knobs.count(knob) || int_knobs.count(knob)) {
int64_t v; int64_t v;

View File

@ -524,6 +524,7 @@ inline static void aligned_free(void* ptr) { free(ptr); }
inline static void* aligned_alloc(size_t alignment, size_t size) { return memalign(alignment, size); } inline static void* aligned_alloc(size_t alignment, size_t size) { return memalign(alignment, size); }
#endif #endif
#elif defined(__APPLE__) #elif defined(__APPLE__)
#if !defined(HAS_ALIGNED_ALLOC)
#include <cstdlib> #include <cstdlib>
inline static void* aligned_alloc(size_t alignment, size_t size) { inline static void* aligned_alloc(size_t alignment, size_t size) {
// Linux's aligned_alloc() requires alignment to be a power of 2. While posix_memalign() // Linux's aligned_alloc() requires alignment to be a power of 2. While posix_memalign()
@ -540,6 +541,7 @@ inline static void* aligned_alloc(size_t alignment, size_t size) {
posix_memalign(&ptr, alignment, size); posix_memalign(&ptr, alignment, size);
return ptr; return ptr;
} }
#endif
inline static void aligned_free(void* ptr) { free(ptr); } inline static void aligned_free(void* ptr) { free(ptr); }
#endif #endif

View File

@ -79,6 +79,7 @@ public: // introduced features
PROTOCOL_VERSION_FEATURE(0x0FDB00A400040000LL, OpenDatabase); PROTOCOL_VERSION_FEATURE(0x0FDB00A400040000LL, OpenDatabase);
PROTOCOL_VERSION_FEATURE(0x0FDB00A446020000LL, Locality); PROTOCOL_VERSION_FEATURE(0x0FDB00A446020000LL, Locality);
PROTOCOL_VERSION_FEATURE(0x0FDB00A460010000LL, MultiGenerationTLog); PROTOCOL_VERSION_FEATURE(0x0FDB00A460010000LL, MultiGenerationTLog);
PROTOCOL_VERSION_FEATURE(0x0FDB00A460010000LL, SharedMutations);
PROTOCOL_VERSION_FEATURE(0x0FDB00A551000000LL, MultiVersionClient); PROTOCOL_VERSION_FEATURE(0x0FDB00A551000000LL, MultiVersionClient);
PROTOCOL_VERSION_FEATURE(0x0FDB00A560010000LL, TagLocality); PROTOCOL_VERSION_FEATURE(0x0FDB00A560010000LL, TagLocality);
PROTOCOL_VERSION_FEATURE(0x0FDB00B060000000LL, Fearless); PROTOCOL_VERSION_FEATURE(0x0FDB00B060000000LL, Fearless);

View File

@ -32,7 +32,7 @@
<Wix xmlns='http://schemas.microsoft.com/wix/2006/wi'> <Wix xmlns='http://schemas.microsoft.com/wix/2006/wi'>
<Product Name='$(var.Title)' <Product Name='$(var.Title)'
Id='{6BAF0715-4FDE-4F0D-8CA7-E4CAD53519B8}' Id='{B69CF2EA-9CDC-4373-83E6-3615F9AE393B}'
UpgradeCode='{A95EA002-686E-4164-8356-C715B7F8B1C8}' UpgradeCode='{A95EA002-686E-4164-8356-C715B7F8B1C8}'
Version='$(var.Version)' Version='$(var.Version)'
Manufacturer='$(var.Manufacturer)' Manufacturer='$(var.Manufacturer)'

View File

@ -1,7 +1,7 @@
<?xml version="1.0"?> <?xml version="1.0"?>
<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup> <PropertyGroup>
<Version>6.2.5</Version> <Version>6.2.7</Version>
<PackageName>6.2</PackageName> <PackageName>6.2</PackageName>
</PropertyGroup> </PropertyGroup>
</Project> </Project>